diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 56e6179a664e26..55ed9c36f6c5cd 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -46,9 +46,8 @@ // // - ElBits is the size of one element in bits. // -// - NF enumerates the number of sub-vectors. -// TODO: Tuple types are represented as a concatination of "NumEls x ElBits" -// vectors. This will be changed to become a struct containing NF vectors. +// - NF enumerates the number of vectors whereby 1 implies a single vector, +// with other values implying a struct of NF "NumEls x NumEls" vectors. // // - IsSigned is true for vectors of signed integer elements and // for vectors of floating-point elements. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9950c06a0b9a6b..ca2d68012c42cd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9990,31 +9990,6 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); - - unsigned N; - switch (IntID) { - case Intrinsic::aarch64_sve_ld2_sret: - case Intrinsic::aarch64_sve_ld1_pn_x2: - case Intrinsic::aarch64_sve_ldnt1_pn_x2: - case Intrinsic::aarch64_sve_ld2q_sret: - N = 2; - break; - case Intrinsic::aarch64_sve_ld3_sret: - case Intrinsic::aarch64_sve_ld3q_sret: - N = 3; - break; - case Intrinsic::aarch64_sve_ld4_sret: - case Intrinsic::aarch64_sve_ld1_pn_x4: - case Intrinsic::aarch64_sve_ldnt1_pn_x4: - case Intrinsic::aarch64_sve_ld4q_sret: - N = 4; - break; - default: - llvm_unreachable("unknown intrinsic!"); - } - auto RetTy = llvm::VectorType::get(VTy->getElementType(), - VTy->getElementCount() * N); - Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); Value *BasePtr = Ops[1]; @@ -10023,15 +9998,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); Function *F = CGM.getIntrinsic(IntID, {VTy}); - Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); - unsigned MinElts = VTy->getMinNumElements(); - Value *Ret = llvm::PoisonValue::get(RetTy); - for (unsigned I = 0; I < N; I++) { - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - Value *SRet = Builder.CreateExtractValue(Call, I); - Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); - } - return Ret; + return Builder.CreateCall(F, {Predicate, BasePtr}); } Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, @@ -10304,6 +10271,19 @@ Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) { // view (when storing/reloading), whereas the svreinterpret builtin // implements bitwise equivalent cast from register point of view. // LLVM CodeGen for a bitcast must add an explicit REV for big-endian. + + if (auto *StructTy = dyn_cast(Ty)) { + Value *Tuple = llvm::PoisonValue::get(Ty); + + for (unsigned I = 0; I < StructTy->getNumElements(); ++I) { + Value *In = Builder.CreateExtractValue(Val, I); + Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I)); + Tuple = Builder.CreateInsertValue(Tuple, Out, I); + } + + return Tuple; + } + return Builder.CreateBitCast(Val, Ty); } @@ -10346,44 +10326,26 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, } Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, - llvm::Type *Ty, ArrayRef Ops) { assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) && "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()"); - - unsigned I = cast(Ops[1])->getSExtValue(); - auto *SingleVecTy = dyn_cast( - TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty); - - if (!SingleVecTy) - return nullptr; - - Value *Idx = ConstantInt::get(CGM.Int64Ty, - I * SingleVecTy->getMinNumElements()); + unsigned Idx = cast(Ops[1])->getZExtValue(); if (TypeFlags.isTupleSet()) - return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx); - return Builder.CreateExtractVector(Ty, Ops[0], Idx); + return Builder.CreateInsertValue(Ops[0], Ops[2], Idx); + return Builder.CreateExtractValue(Ops[0], Idx); } Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, - llvm::Type *Ty, - ArrayRef Ops) { + llvm::Type *Ty, + ArrayRef Ops) { assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate"); - auto *SrcTy = dyn_cast(Ops[0]->getType()); - - if (!SrcTy) - return nullptr; + Value *Tuple = llvm::PoisonValue::get(Ty); + for (unsigned Idx = 0; Idx < Ops.size(); Idx++) + Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx); - unsigned MinElts = SrcTy->getMinNumElements(); - Value *Call = llvm::PoisonValue::get(Ty); - for (unsigned I = 0; I < Ops.size(); I++) { - Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); - Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx); - } - - return Call; + return Tuple; } Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { @@ -10453,27 +10415,14 @@ void CodeGenFunction::GetAArch64SVEProcessedOperands( continue; } - if (IsTupleGetOrSet || !isa(Arg->getType())) { - Ops.push_back(Arg); - continue; - } + if (isa(Arg->getType()) && !IsTupleGetOrSet) { + for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I) + Ops.push_back(Builder.CreateExtractValue(Arg, I)); - auto *VTy = cast(Arg->getType()); - unsigned MinElts = VTy->getMinNumElements(); - bool IsPred = VTy->getElementType()->isIntegerTy(1); - unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); - - if (N == 1) { - Ops.push_back(Arg); continue; } - for (unsigned I = 0; I < N; ++I) { - Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); - auto *NewVTy = - ScalableVectorType::get(VTy->getElementType(), MinElts / N); - Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); - } + Ops.push_back(Arg); } } @@ -10511,7 +10460,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, else if (TypeFlags.isStructStore()) return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) - return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); + return EmitSVETupleSetOrGet(TypeFlags, Ops); else if (TypeFlags.isTupleCreate()) return EmitSVETupleCreate(TypeFlags, Ty, Ops); else if (TypeFlags.isUndef()) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 4eca770ca35d85..7ea8cfd83cd7d4 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4628,7 +4628,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags); llvm::ScalableVectorType *getSVEPredType(const SVETypeFlags &TypeFlags); llvm::Value *EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, - llvm::Type *ReturnType, ArrayRef Ops); llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 5eebd8ad2a0653..d087e714882b33 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -509,9 +509,20 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { { ASTContext::BuiltinVectorTypeInfo Info = Context.getBuiltinVectorTypeInfo(cast(Ty)); - return llvm::ScalableVectorType::get(ConvertType(Info.ElementType), - Info.EC.getKnownMinValue() * - Info.NumVectors); + auto VTy = + llvm::VectorType::get(ConvertType(Info.ElementType), Info.EC); + switch (Info.NumVectors) { + default: + llvm_unreachable("Expected 1, 2, 3 or 4 vectors!"); + case 1: + return VTy; + case 2: + return llvm::StructType::get(VTy, VTy); + case 3: + return llvm::StructType::get(VTy, VTy, VTy); + case 4: + return llvm::StructType::get(VTy, VTy, VTy, VTy); + } } case BuiltinType::SveCount: return llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c index 135bcf743e5e6a..7e7597f82136ca 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -24,16 +24,12 @@ // x2 // CHECK-LABEL: @test_svadd_write_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s32j11svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { @@ -42,16 +38,12 @@ void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t // CHECK-LABEL: @test_svadd_write_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u32j12svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { @@ -60,16 +52,12 @@ void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32 // CHECK-LABEL: @test_svadd_write_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s64j11svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { @@ -78,16 +66,12 @@ void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t // CHECK-LABEL: @test_svadd_write_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u64j12svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { @@ -98,20 +82,12 @@ void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64 // CHECK-LABEL: @test_svadd_write_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_s32j11svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { @@ -120,20 +96,12 @@ void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t // CHECK-LABEL: @test_svadd_write_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_u32j12svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { @@ -142,20 +110,12 @@ void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32 // CHECK-LABEL: @test_svadd_write_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_s64j11svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { @@ -164,20 +124,12 @@ void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t // CHECK-LABEL: @test_svadd_write_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_u64j12svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { @@ -192,20 +144,12 @@ void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CHECK-LABEL: @test_svadd_write_multi2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s32j11svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_inout("za") { @@ -214,20 +158,12 @@ void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CHECK-LABEL: @test_svadd_write_multi2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u32j12svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_inout("za") { @@ -236,20 +172,12 @@ void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CHECK-LABEL: @test_svadd_write_multi2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s64j11svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_inout("za") { @@ -258,20 +186,12 @@ void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CHECK-LABEL: @test_svadd_write_multi2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u64j12svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_inout("za") { @@ -282,28 +202,12 @@ void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CHECK-LABEL: @test_svadd_write_multi4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s32j11svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_inout("za") { @@ -312,28 +216,12 @@ void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CHECK-LABEL: @test_svadd_write_multi4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u32j12svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_inout("za") { @@ -342,28 +230,12 @@ void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CHECK-LABEL: @test_svadd_write_multi4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s64j11svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_inout("za") { @@ -372,28 +244,12 @@ void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CHECK-LABEL: @test_svadd_write_multi4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u64j12svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_inout("za") { @@ -408,16 +264,12 @@ void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CHECK-LABEL: @test_svadd_za32_vg1x2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_f32j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_inout("za") { @@ -426,16 +278,12 @@ void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_stre // CHECK-LABEL: @test_svadd_za32_vg1x2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_s32j11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_inout("za") { @@ -444,16 +292,12 @@ void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_stream // CHECK-LABEL: @test_svadd_za32_vg1x2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_u32j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_inout("za") { @@ -462,16 +306,12 @@ void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_strea // CHECK-LABEL: @test_svadd_za64_vg1x2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_f64j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_inout("za") { @@ -480,16 +320,12 @@ void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_stre // CHECK-LABEL: @test_svadd_za64_vg1x2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_s64j11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_inout("za") { @@ -498,16 +334,12 @@ void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_stream // CHECK-LABEL: @test_svadd_za64_vg1x2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_u64j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_inout("za") { @@ -518,20 +350,12 @@ void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_strea // CHECK-LABEL: @test_svadd_za32_vg1x4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_f32j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_inout("za") { @@ -540,20 +364,12 @@ void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_stre // CHECK-LABEL: @test_svadd_za32_vg1x4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_s32j11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_inout("za") { @@ -562,20 +378,12 @@ void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_stream // CHECK-LABEL: @test_svadd_za32_vg1x4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_u32j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_inout("za") { @@ -584,20 +392,12 @@ void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_strea // CHECK-LABEL: @test_svadd_za64_vg1x4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_f64j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_inout("za") { @@ -606,20 +406,12 @@ void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_stre // CHECK-LABEL: @test_svadd_za64_vg1x4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_s64j11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_inout("za") { @@ -628,20 +420,12 @@ void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_stream // CHECK-LABEL: @test_svadd_za64_vg1x4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_u64j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c index c88c6978719115..6bd9eab5f1846b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -17,19 +17,15 @@ #endif // CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") { @@ -37,23 +33,15 @@ void test_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming } // CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x4_f16j13svfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") { @@ -61,19 +49,15 @@ void test_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming } // CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x2_f16j13svfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za") { @@ -81,23 +65,15 @@ void test_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming } // CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svsub_za16_vg1x4_f16j13svfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za") { @@ -105,19 +81,15 @@ void test_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming } // CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x2_bf16j14svbfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svadd_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") { @@ -125,23 +97,15 @@ void test_svadd_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streami } // CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x4_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svadd_za16_vg1x4_bf16j14svbfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.add.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svadd_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") { @@ -149,19 +113,15 @@ void test_svadd_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streami } // CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x2_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x2_bf16j14svbfloat16x2_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svsub_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streaming __arm_inout("za") { @@ -169,23 +129,15 @@ void test_svsub_za16_vg1x2_bf16(uint32_t slice, svbfloat16x2_t zn) __arm_streami } // CHECK-LABEL: define dso_local void @test_svsub_za16_vg1x4_bf16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svsub_za16_vg1x4_bf16j14svbfloat16x4_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sub.za16.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svsub_za16_vg1x4_bf16(uint32_t slice, svbfloat16x4_t zn) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c index 972a6582998838..9c639984305d1b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_clamp.c @@ -24,25 +24,27 @@ // CHECK-LABEL: @test_svclamp_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x210svint8x2_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x2, , )(op1, op2, op3); @@ -50,25 +52,27 @@ svint8x2_t test_svclamp_single_s8_x2(svint8x2_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x211svint16x2_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x2, , )(op1, op2, op3); @@ -76,25 +80,27 @@ svint16x2_t test_svclamp_single_s16_x2(svint16x2_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x211svint32x2_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x2, , )(op1, op2, op3); @@ -102,25 +108,27 @@ svint32x2_t test_svclamp_single_s32_x2(svint32x2_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x211svint64x2_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x2, , )(op1, op2, op3); @@ -131,37 +139,35 @@ svint64x2_t test_svclamp_single_s64_x2(svint64x2_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_s8_x410svint8x4_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s8_x4, , )(op1, op2, op3); @@ -169,37 +175,35 @@ svint8x4_t test_svclamp_single_s8_x4(svint8x4_t op1, svint8_t op2, svint8_t op3) // CHECK-LABEL: @test_svclamp_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s16_x411svint16x4_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s16_x4, , )(op1, op2, op3); @@ -207,37 +211,35 @@ svint16x4_t test_svclamp_single_s16_x4(svint16x4_t op1, svint16_t op2, svint16_t // CHECK-LABEL: @test_svclamp_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s32_x411svint32x4_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s32_x4, , )(op1, op2, op3); @@ -245,37 +247,35 @@ svint32x4_t test_svclamp_single_s32_x4(svint32x4_t op1, svint32_t op2, svint32_t // CHECK-LABEL: @test_svclamp_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_s64_x411svint64x4_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_s64_x4, , )(op1, op2, op3); @@ -288,25 +288,27 @@ svint64x4_t test_svclamp_single_s64_x4(svint64x4_t op1, svint64_t op2, svint64_t // CHECK-LABEL: @test_svclamp_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x211svuint8x2_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x2, , )(op1, op2, op3); @@ -314,25 +316,27 @@ svuint8x2_t test_svclamp_single_u8_x2(svuint8x2_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x212svuint16x2_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x2, , )(op1, op2, op3); @@ -340,25 +344,27 @@ svuint16x2_t test_svclamp_single_u16_x2(svuint16x2_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x212svuint32x2_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x2, , )(op1, op2, op3); @@ -366,25 +372,27 @@ svuint32x2_t test_svclamp_single_u32_x2(svuint32x2_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x212svuint64x2_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x2, , )(op1, op2, op3); @@ -395,37 +403,35 @@ svuint64x2_t test_svclamp_single_u64_x2(svuint64x2_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svclamp_single_u8_x411svuint8x4_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[OP1]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u8_x4, , )(op1, op2, op3); @@ -433,37 +439,35 @@ svuint8x4_t test_svclamp_single_u8_x4(svuint8x4_t op1, svuint8_t op2, svuint8_t // CHECK-LABEL: @test_svclamp_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u16_x412svuint16x4_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u16_x4, , )(op1, op2, op3); @@ -471,37 +475,35 @@ svuint16x4_t test_svclamp_single_u16_x4(svuint16x4_t op1, svuint16_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u32_x412svuint32x4_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u32_x4, , )(op1, op2, op3); @@ -509,37 +511,35 @@ svuint32x4_t test_svclamp_single_u32_x4(svuint32x4_t op1, svuint32_t op2, svuint // CHECK-LABEL: @test_svclamp_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_u64_x412svuint64x4_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_u64_x4, , )(op1, op2, op3); @@ -552,25 +552,27 @@ svuint64x4_t test_svclamp_single_u64_x4(svuint64x4_t op1, svuint64_t op2, svuint // CHECK-LABEL: @test_svclamp_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x213svfloat16x2_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x2, , )(op1, op2, op3); @@ -578,25 +580,27 @@ svfloat16x2_t test_svclamp_single_f16_x2(svfloat16x2_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x213svfloat32x2_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x2, , )(op1, op2, op3); @@ -605,25 +609,27 @@ svfloat32x2_t test_svclamp_single_f32_x2(svfloat32x2_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x213svfloat64x2_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x2, , )(op1, op2, op3); @@ -634,37 +640,35 @@ svfloat64x2_t test_svclamp_single_f64_x2(svfloat64x2_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f16_x413svfloat16x4_tu13__SVFloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f16_x4, , )(op1, op2, op3); @@ -672,37 +676,35 @@ svfloat16x4_t test_svclamp_single_f16_x4(svfloat16x4_t op1, svfloat16_t op2, svf // CHECK-LABEL: @test_svclamp_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f32_x413svfloat32x4_tu13__SVFloat32_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[OP1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svfloat32_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f32_x4, , )(op1, op2, op3); @@ -710,37 +712,35 @@ svfloat32x4_t test_svclamp_single_f32_x4(svfloat32x4_t op1, svfloat32_t op2, svf // CHECK-LABEL: @test_svclamp_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svclamp_single_f64_x413svfloat64x4_tu13__SVFloat64_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[OP1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svfloat64_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_f64_x4, , )(op1, op2, op3); @@ -748,25 +748,27 @@ svfloat64x4_t test_svclamp_single_f64_x4(svfloat64x4_t op1, svfloat64_t op2, svf // CHECK-LABEL: @test_svclamp_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x2, , )(op1, op2, op3); @@ -774,37 +776,35 @@ svbfloat16x2_t test_svclamp_single_bf16_x2(svbfloat16x2_t op1, svbfloat16_t op2, // CHECK-LABEL: @test_svclamp_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svclamp_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[OP1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[OP2:%.*]], [[OP3:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( [[OP1_COERCE0:%.*]], [[OP1_COERCE1:%.*]], [[OP1_COERCE2:%.*]], [[OP1_COERCE3:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svclamp_single_bf16_x4(svbfloat16x4_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming { return SVE_ACLE_FUNC(svclamp, _single_bf16_x4, , )(op1, op2, op3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index 686e4f4f99afe9..2d61670fd60493 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -19,17 +19,13 @@ // CHECK-LABEL: @test_cvt_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fcvt.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvt.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_cvt_f16_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fcvt.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvt.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svfloat16_t test_cvt_f16_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f16,_f32_x2,,)(zn); @@ -37,17 +33,13 @@ svfloat16_t test_cvt_f16_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_cvt_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bfcvt.x2( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfcvt.x2( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_cvt_bf16_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bfcvt.x2( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfcvt.x2( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_bf16,_f32_x2,,)(zn); @@ -57,25 +49,27 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // x2 // CHECK-LABEL: @test_svcvt_f32_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x2,,)(zn); @@ -83,25 +77,27 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x2,,)(zn); @@ -109,25 +105,27 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x2,,)(zn); @@ -135,25 +133,27 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x2,,)(zn); @@ -162,37 +162,35 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // x4 // CHECK-LABEL: @test_svcvt_f32_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_u32_x4,,)(zn); @@ -200,37 +198,35 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_f32_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_f32_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_s32_x4,,)(zn); @@ -238,37 +234,35 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_u32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_u32,_f32_x4,,)(zn); @@ -276,37 +270,35 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svcvt_s32_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svcvt_s32_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_s32,_f32_x4,,)(zn); @@ -314,17 +306,13 @@ svint32x4_t test_svcvt_s32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_s16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvt_s16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_qcvt_s16_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_s16,_s32_x2,,)(zn); @@ -332,17 +320,13 @@ svint16_t test_qcvt_s16_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_u16_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvt_u16_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvt_u16_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u16,_u32_x2,,)(zn); @@ -350,17 +334,13 @@ svuint16_t test_qcvt_u16_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_u16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvt_u16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvt_u16_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u16,_s32_x2,,)(zn); @@ -368,21 +348,13 @@ svuint16_t test_qcvt_u16_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_u8_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_qcvt_u8_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_qcvt_u8_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u8,_u32_x4,,)(zn); @@ -390,21 +362,13 @@ svuint8_t test_qcvt_u8_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_u16_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvt_u16_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvt.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvt_u16_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u16,_u64_x4,,)(zn); @@ -412,21 +376,13 @@ svuint16_t test_qcvt_u16_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_s8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_qcvt_s8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint8_t test_qcvt_s8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_s8,_s32_x4,,)(zn); @@ -434,21 +390,13 @@ svint8_t test_qcvt_s8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_s16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvt_s16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvt.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_qcvt_s16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_s16,_s64_x4,,)(zn); @@ -456,21 +404,13 @@ svint16_t test_qcvt_s16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_u8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_qcvt_u8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_qcvt_u8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u8,_s32_x4,,)(zn); @@ -478,21 +418,13 @@ svuint8_t test_qcvt_u8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvt_u16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvt_u16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtu.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvt_u16,_s64_x4,,)(zn); @@ -500,21 +432,27 @@ svuint16_t test_qcvt_u16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_cvt_f32_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z15test_cvt_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvt.widen.x2.nxv4f32( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // __attribute__((target("sme-f16f16"))) svfloat32x2_t test_cvt_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvt_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c index 453dd3db6adf09..fc5c0376e925e0 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtl.c @@ -19,21 +19,27 @@ // CHECK-LABEL: @test_cvtl_f32_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_cvtl_f32_x2u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fcvtl.widen.x2.nxv4f32( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_cvtl_f32_x2(svfloat16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtl_f32,_f16_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtn.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtn.c index 78aeb67babf4c9..b38bf6fd350846 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtn.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvtn.c @@ -19,17 +19,13 @@ // CHECK-LABEL: @test_cvtn_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_cvtn_f16_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svfloat16_t test_cvtn_f16_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtn_f16,_f32_x2,,)(zn); @@ -37,17 +33,13 @@ svfloat16_t test_cvtn_f16_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_cvtn_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bfcvtn.x2( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfcvtn.x2( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_cvtn_bf16_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.bfcvtn.x2( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfcvtn.x2( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svbfloat16_t test_cvtn_bf16_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svcvtn_bf16,_f32_x2,,)(zn); @@ -55,21 +47,13 @@ svbfloat16_t test_cvtn_bf16_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvtn_u8_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvtn_u8_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_qcvtn_u8_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvtn_u8,_u32_x4,,)(zn); @@ -77,21 +61,13 @@ svuint8_t test_qcvtn_u8_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvtn_u16_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_qcvtn_u16_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvtn_u16_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvtn_u16,_u64_x4,,)(zn); @@ -99,21 +75,13 @@ svuint16_t test_qcvtn_u16_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvtn_s8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z20test_qcvtn_s8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint8_t test_qcvtn_s8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvtn_s8,_s32_x4,,)(zn); @@ -121,21 +89,13 @@ svint8_t test_qcvtn_s8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvtn_s16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_qcvtn_s16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_qcvtn_s16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvtn_s16,_s64_x4,,)(zn); @@ -143,21 +103,13 @@ svint16_t test_qcvtn_s16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvtn_u8_32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_qcvtn_u8_32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_qcvtn_u8_32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvtn_u8,_s32_x4,,)(zn); @@ -165,21 +117,13 @@ svuint8_t test_qcvtn_u8_32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_qcvtn_u16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_qcvtn_u16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvtn_u16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqcvtn_u16,_s64_x4,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c index 5d026f8cde5e05..a1540bba2a8a96 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c @@ -19,29 +19,27 @@ // CHECK-LABEL: @test_svamax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x2)(zdn, zm); @@ -49,29 +47,27 @@ svfloat16x2_t test_svamax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x2)(zdn, zm); @@ -79,29 +75,27 @@ svfloat32x2_t test_svamax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x2)(zdn, zm); @@ -109,29 +103,27 @@ svfloat64x2_t test_svamax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x2)(zdn, zm); @@ -139,29 +131,27 @@ svfloat16x2_t test_svamin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x2)(zdn, zm); @@ -169,29 +159,27 @@ svfloat32x2_t test_svamin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.famin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x2)(zdn, zm); @@ -201,45 +189,35 @@ svfloat64x2_t test_svamin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f16_x4)(zdn, zm); @@ -247,45 +225,35 @@ svfloat16x4_t test_svamax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f32_x4)(zdn, zm); @@ -293,45 +261,35 @@ svfloat32x4_t test_svamax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svamax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamax,_f64_x4)(zdn, zm); @@ -339,45 +297,35 @@ svfloat64x4_t test_svamax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f16_x4)(zdn, zm); @@ -385,45 +333,35 @@ svfloat16x4_t test_svamin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f32_x4)(zdn, zm); @@ -431,45 +369,35 @@ svfloat32x4_t test_svamin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_stre // CHECK-LABEL: @test_svamin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svamin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.famin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svamin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svamin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c index 909bf657dec5c7..4338ea030b504f 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c @@ -16,19 +16,15 @@ #endif // CHECK-LABEL: define dso_local void @test_svmla_single_za16_f16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z32test_svmla_single_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmla_single_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -36,23 +32,15 @@ void test_svmla_single_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat1 } // CHECK-LABEL: define dso_local void @test_svmla_single_za16_f16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z32test_svmla_single_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmla_single_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -60,19 +48,15 @@ void test_svmla_single_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat1 } // CHECK-LABEL: define dso_local void @test_svmls_single_za16_f16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z32test_svmls_single_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmls_single_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -80,23 +64,15 @@ void test_svmls_single_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat1 } // CHECK-LABEL: define dso_local void @test_svmls_single_za16_f16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z32test_svmls_single_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmls_single_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -104,23 +80,15 @@ void test_svmls_single_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat1 } // CHECK-LABEL: define dso_local void @test_svmla_za16_f16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svmla_za16_f16_vg1x2j13svfloat16x2_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svmla_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { @@ -128,31 +96,15 @@ void test_svmla_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16x2_t z } // CHECK-LABEL: define dso_local void @test_svmla_za16_f16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svmla_za16_f16_vg1x4j13svfloat16x4_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svmla_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") { @@ -160,23 +112,15 @@ void test_svmla_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16x4_t z } // CHECK-LABEL: define dso_local void @test_svmls_za16_f16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svmls_za16_f16_vg1x2j13svfloat16x2_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svmls_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { @@ -184,31 +128,15 @@ void test_svmls_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16x2_t z } // CHECK-LABEL: define dso_local void @test_svmls_za16_f16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z25test_svmls_za16_f16_vg1x4j13svfloat16x4_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svmls_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") { @@ -216,19 +144,15 @@ void test_svmls_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16x4_t z } // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_f16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z30test_svmla_lane_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmla_lane_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -236,23 +160,15 @@ void test_svmla_lane_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_ } // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_f16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z30test_svmla_lane_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmla_lane_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -260,19 +176,15 @@ void test_svmla_lane_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_ } // CHECK-LABEL: define dso_local void @test_svmls_lane_za16_f16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z30test_svmls_lane_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmls_lane_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -280,23 +192,15 @@ void test_svmls_lane_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_ } // CHECK-LABEL: define dso_local void @test_svmls_lane_za16_f16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z30test_svmls_lane_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8f16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmls_lane_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -304,19 +208,15 @@ void test_svmls_lane_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_ } // CHECK-LABEL: define dso_local void @test_svmla_single_za16_bf16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z33test_svmla_single_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmla_single_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -324,23 +224,15 @@ void test_svmla_single_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbflo } // CHECK-LABEL: define dso_local void @test_svmla_single_za16_bf16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z33test_svmla_single_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmla_single_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -348,19 +240,15 @@ void test_svmla_single_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbflo } // CHECK-LABEL: define dso_local void @test_svmls_single_za16_bf16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z33test_svmls_single_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmls_single_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -368,23 +256,15 @@ void test_svmls_single_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbflo } // CHECK-LABEL: define dso_local void @test_svmls_single_za16_bf16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z33test_svmls_single_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]]) // CHECK-CXX-NEXT: ret void // void test_svmls_single_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -392,23 +272,15 @@ void test_svmls_single_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbflo } // CHECK-LABEL: define dso_local void @test_svmla_za16_bf16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svmla_za16_bf16_vg1x2j14svbfloat16x2_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svmla_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { @@ -416,31 +288,15 @@ void test_svmla_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16x2_ } // CHECK-LABEL: define dso_local void @test_svmla_za16_bf16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svmla_za16_bf16_vg1x4j14svbfloat16x4_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svmla_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") { @@ -448,23 +304,15 @@ void test_svmla_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16x4_ } // CHECK-LABEL: define dso_local void @test_svmls_za16_bf16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svmls_za16_bf16_vg1x2j14svbfloat16x2_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM_COERCE0]], [[ZM_COERCE1]]) // CHECK-CXX-NEXT: ret void // void test_svmls_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { @@ -472,31 +320,15 @@ void test_svmls_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16x2_ } // CHECK-LABEL: define dso_local void @test_svmls_za16_bf16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z26test_svmls_za16_bf16_vg1x4j14svbfloat16x4_tS_( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 0) -// CHECK-CXX-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-CXX-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-CXX-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM_COERCE0]], [[ZM_COERCE1]], [[ZM_COERCE2]], [[ZM_COERCE3]]) // CHECK-CXX-NEXT: ret void // void test_svmls_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") { @@ -504,19 +336,15 @@ void test_svmls_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16x4_ } // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_bf16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z31test_svmla_lane_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmla_lane_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -524,23 +352,15 @@ void test_svmla_lane_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat } // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_bf16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z31test_svmla_lane_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmla_lane_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -548,19 +368,15 @@ void test_svmla_lane_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat } // CHECK-LABEL: define dso_local void @test_svmls_lane_za16_bf16_vg1x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z31test_svmls_lane_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmls_lane_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -568,23 +384,15 @@ void test_svmls_lane_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat } // CHECK-LABEL: define dso_local void @test_svmls_lane_za16_bf16_vg1x4( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z31test_svmls_lane_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t( -// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 0) -// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-CXX-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-CXX-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM]], i32 7) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8bf16(i32 [[SLICE]], [[ZN_COERCE0]], [[ZN_COERCE1]], [[ZN_COERCE2]], [[ZN_COERCE3]], [[ZM]], i32 7) // CHECK-CXX-NEXT: ret void // void test_svmls_lane_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c index 3392ba9235bfda..00cbfdbe7ca341 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fp_dots.c @@ -19,20 +19,12 @@ // Multi, multi (half) // CHECK-LABEL: @test_svdot_multi_za32_vg1x2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za32_vg1x2_f16j13svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") { @@ -41,28 +33,12 @@ void test_svdot_multi_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfl // CHECK-LABEL: @test_svdot_multi_za32_vg1x4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za32_vg1x4_f16j13svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") { @@ -74,16 +50,12 @@ void test_svdot_multi_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfl // Multi, single (half) // CHECK-LABEL: @test_svdot_single_za32_vg1x2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za32_vg1x2_f16j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -92,20 +64,12 @@ void test_svdot_single_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svf // CHECK-LABEL: @test_svdot_single_za32_vg1x4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za32_vg1x4_f16j13svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -117,16 +81,12 @@ void test_svdot_single_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svf // Multi, indexed (half) // CHECK-LABEL: @test_svdot_lane_za32_vg1x2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za32_vg1x2_f16j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -135,20 +95,12 @@ void test_svdot_lane_za32_vg1x2_f16(uint32_t slice_base, svfloat16x2_t zn, svflo // CHECK-LABEL: @test_svdot_lane_za32_vg1x4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za32_vg1x4_f16j13svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -160,20 +112,12 @@ void test_svdot_lane_za32_vg1x4_f16(uint32_t slice_base, svfloat16x4_t zn, svflo // Multi, multi (bfloat) // CHECK-LABEL: @test_svdot_multi_za32_vg1x2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_multi_za32_vg1x2_bf16j14svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") { @@ -182,28 +126,12 @@ void test_svdot_multi_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, sv // CHECK-LABEL: @test_svdot_multi_za32_vg1x4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_multi_za32_vg1x4_bf16j14svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") { @@ -215,16 +143,12 @@ void test_svdot_multi_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, sv // Multi, single (bfloat) // CHECK-LABEL: @test_svdot_single_za32_vg1x2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z33test_svdot_single_za32_vg1x2_bf16j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -233,20 +157,12 @@ void test_svdot_single_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, s // CHECK-LABEL: @test_svdot_single_za32_vg1x4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z33test_svdot_single_za32_vg1x4_bf16j14svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.single.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -258,16 +174,12 @@ void test_svdot_single_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, s // Multi, indexed (bfloat) // CHECK-LABEL: @test_svdot_lane_za32_vg1x2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_lane_za32_vg1x2_bf16j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -276,20 +188,12 @@ void test_svdot_lane_za32_vg1x2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svb // CHECK-LABEL: @test_svdot_lane_za32_vg1x4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_lane_za32_vg1x4_bf16j14svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fdot.lane.za32.vg1x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c index 216e44339d8ce6..abdb5a46d54532 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_frint.c @@ -21,25 +21,27 @@ // CHECK-LABEL: @test_svfrinta_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frinta.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x2)(zn); @@ -47,37 +49,35 @@ svfloat32x2_t test_svfrinta_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrinta_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svfrinta_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frinta.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrinta,_f32_x4)(zn); @@ -87,25 +87,27 @@ svfloat32x4_t test_svfrinta_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintam_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z21test_svfrintam_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintm.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x2)(zn); @@ -113,37 +115,35 @@ svfloat32x2_t test_svfrintam_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintm_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svfrintm_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintm.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintm,_f32_x4)(zn); @@ -153,25 +153,27 @@ svfloat32x4_t test_svfrintm_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintn.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x2)(zn); @@ -179,37 +181,35 @@ svfloat32x2_t test_svfrintn_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintn_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svfrintn_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintn.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintn,_f32_x4)(zn); @@ -219,25 +219,27 @@ svfloat32x4_t test_svfrintn_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.frintp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x2)(zn); @@ -245,37 +247,35 @@ svfloat32x2_t test_svfrintp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svfrintp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svfrintp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.frintp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svfrintp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svrintp,_f32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c index ab8b4fc4108c34..961133b89eb29c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_int_dots.c @@ -19,20 +19,12 @@ // Multi, multi (unsigned) // CHECK-LABEL: @test_svdot_multi_za32_vg1x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za32_vg1x2_u16j12svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { @@ -41,28 +33,12 @@ void test_svdot_multi_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuin // CHECK-LABEL: @test_svdot_multi_za32_vg1x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za32_vg1x4_u16j12svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { @@ -71,20 +47,12 @@ void test_svdot_multi_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuin // CHECK-LABEL: @test_svdot_multi_za32_vg1x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_multi_za32_vg1x2_u8j11svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { @@ -93,28 +61,12 @@ void test_svdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8 // CHECK-LABEL: @test_svdot_multi_za32_vg1x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_multi_za32_vg1x4_u8j11svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { @@ -123,20 +75,12 @@ void test_svdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8 // CHECK-LABEL: @test_svdot_multi_za64_vg1x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za64_vg1x2_u16j12svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") { @@ -145,28 +89,12 @@ void test_svdot_multi_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuin // CHECK-LABEL: @test_svdot_multi_za64_vg1x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za64_vg1x4_u16j12svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") { @@ -178,20 +106,12 @@ void test_svdot_multi_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuin // Multi, multi (signed) // CHECK-LABEL: @test_svdot_multi_za32_vg1x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za32_vg1x2_s16j11svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { @@ -200,28 +120,12 @@ void test_svdot_multi_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint1 // CHECK-LABEL: @test_svdot_multi_za32_vg1x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za32_vg1x4_s16j11svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { @@ -230,20 +134,12 @@ void test_svdot_multi_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint1 // CHECK-LABEL: @test_svdot_multi_za32_vg1x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_multi_za32_vg1x2_s8j10svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { @@ -252,28 +148,12 @@ void test_svdot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2 // CHECK-LABEL: @test_svdot_multi_za32_vg1x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_multi_za32_vg1x4_s8j10svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { @@ -282,20 +162,12 @@ void test_svdot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4 // CHECK-LABEL: @test_svdot_multi_za64_vg1x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za64_vg1x2_s16j11svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") { @@ -304,28 +176,12 @@ void test_svdot_multi_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint1 // CHECK-LABEL: @test_svdot_multi_za64_vg1x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_multi_za64_vg1x4_s16j11svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_multi_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") { @@ -337,16 +193,12 @@ void test_svdot_multi_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint1 // Multi, single (unsigned) // CHECK-LABEL: @test_svdot_single_za32_vg1x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za32_vg1x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -355,20 +207,12 @@ void test_svdot_single_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svui // CHECK-LABEL: @test_svdot_single_za32_vg1x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za32_vg1x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -377,16 +221,12 @@ void test_svdot_single_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svui // CHECK-LABEL: @test_svdot_single_za32_vg1x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_single_za32_vg1x2_u8j11svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -395,20 +235,12 @@ void test_svdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint // CHECK-LABEL: @test_svdot_single_za32_vg1x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_single_za32_vg1x4_u8j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -417,16 +249,12 @@ void test_svdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint // CHECK-LABEL: @test_svdot_single_za64_vg1x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za64_vg1x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -435,20 +263,12 @@ void test_svdot_single_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svui // CHECK-LABEL: @test_svdot_single_za64_vg1x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za64_vg1x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -460,16 +280,12 @@ void test_svdot_single_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svui // Multi, single (signed) // CHECK-LABEL: @test_svdot_single_za32_vg1x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za32_vg1x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -478,20 +294,12 @@ void test_svdot_single_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint // CHECK-LABEL: @test_svdot_single_za32_vg1x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za32_vg1x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -500,16 +308,12 @@ void test_svdot_single_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint // CHECK-LABEL: @test_svdot_single_za32_vg1x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_single_za32_vg1x2_s8j10svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -518,20 +322,12 @@ void test_svdot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_ // CHECK-LABEL: @test_svdot_single_za32_vg1x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svdot_single_za32_vg1x4_s8j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -540,16 +336,12 @@ void test_svdot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_ // CHECK-LABEL: @test_svdot_single_za64_vg1x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za64_vg1x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -558,20 +350,12 @@ void test_svdot_single_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint // CHECK-LABEL: @test_svdot_single_za64_vg1x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svdot_single_za64_vg1x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.single.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svdot_single_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -582,16 +366,12 @@ void test_svdot_single_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint // Multi, indexed (unsigned) // CHECK-LABEL: @test_svdot_lane_za32_vg1x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za32_vg1x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -600,20 +380,12 @@ void test_svdot_lane_za32_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint // CHECK-LABEL: @test_svdot_lane_za32_vg1x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za32_vg1x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -622,16 +394,12 @@ void test_svdot_lane_za32_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint // CHECK-LABEL: @test_svdot_lane_za32_vg1x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svdot_lane_za32_vg1x2_u8j11svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -640,20 +408,12 @@ void test_svdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_ // CHECK-LABEL: @test_svdot_lane_za32_vg1x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svdot_lane_za32_vg1x4_u8j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -662,16 +422,12 @@ void test_svdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_ // CHECK-LABEL: @test_svdot_lane_za64_vg1x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za64_vg1x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -680,20 +436,12 @@ void test_svdot_lane_za64_vg1x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint // CHECK-LABEL: @test_svdot_lane_za64_vg1x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za64_vg1x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.udot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -705,16 +453,12 @@ void test_svdot_lane_za64_vg1x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint // Multi, indexed (signed) // CHECK-LABEL: @test_svdot_lane_za32_vg1x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za32_vg1x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -723,20 +467,12 @@ void test_svdot_lane_za32_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16 // CHECK-LABEL: @test_svdot_lane_za32_vg1x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za32_vg1x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -745,16 +481,12 @@ void test_svdot_lane_za32_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16 // CHECK-LABEL: @test_svdot_lane_za32_vg1x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svdot_lane_za32_vg1x2_s8j10svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -763,20 +495,12 @@ void test_svdot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t // CHECK-LABEL: @test_svdot_lane_za32_vg1x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svdot_lane_za32_vg1x4_s8j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -785,16 +509,12 @@ void test_svdot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t // CHECK-LABEL: @test_svdot_lane_za64_vg1x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za64_vg1x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -803,20 +523,12 @@ void test_svdot_lane_za64_vg1x2_s16(uint32_t slice_base, svint16x2_t zn, svint16 // CHECK-LABEL: @test_svdot_lane_za64_vg1x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svdot_lane_za64_vg1x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svdot_lane_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -828,20 +540,12 @@ void test_svdot_lane_za64_vg1x4_s16(uint32_t slice_base, svint16x4_t zn, svint16 // Multi, multi (unsigned by signed) // CHECK-LABEL: @test_svusdot_multi_za32_vg1x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svusdot_multi_za32_vg1x2_u8j11svuint8x2_t10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svusdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") { @@ -850,28 +554,12 @@ void test_svusdot_multi_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint // CHECK-LABEL: @test_svusdot_multi_za32_vg1x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svusdot_multi_za32_vg1x4_u8j11svuint8x4_t10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svusdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") { @@ -883,16 +571,12 @@ void test_svusdot_multi_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint // Multi, single (unsigned by signed) // CHECK-LABEL: @test_svusdot_single_za32_vg1x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z33test_svusdot_single_za32_vg1x2_u8j11svuint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svusdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -901,20 +585,12 @@ void test_svusdot_single_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svin // CHECK-LABEL: @test_svusdot_single_za32_vg1x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z33test_svusdot_single_za32_vg1x4_u8j11svuint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svusdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -925,16 +601,12 @@ void test_svusdot_single_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svin // Multi, indexed (unsigned by signed) // CHECK-LABEL: @test_svusdot_lane_za32_vg1x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svusdot_lane_za32_vg1x2_u8j11svuint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svusdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -943,20 +615,12 @@ void test_svusdot_lane_za32_vg1x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8 // CHECK-LABEL: @test_svusdot_lane_za32_vg1x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svusdot_lane_za32_vg1x4_u8j11svuint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svusdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -968,16 +632,12 @@ void test_svusdot_lane_za32_vg1x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8 // Multi, single (signed by unsigned) // CHECK-LABEL: @test_svsudot_single_za32_vg1x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z33test_svsudot_single_za32_vg1x2_s8j10svint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsudot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -986,20 +646,12 @@ void test_svsudot_single_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuin // CHECK-LABEL: @test_svsudot_single_za32_vg1x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z33test_svsudot_single_za32_vg1x4_s8j10svint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.single.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsudot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -1010,20 +662,12 @@ void test_svsudot_single_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuin // Multi, multi (signed by unsigned) // CHECK-LABEL: @test_svsudot_multi_za32_vg1x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP2]], [[TMP3]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svsudot_multi_za32_vg1x2_s8j10svint8x2_t11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP2]], [[TMP3]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsudot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") { @@ -1032,28 +676,12 @@ void test_svsudot_multi_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint // CHECK-LABEL: @test_svsudot_multi_za32_vg1x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svsudot_multi_za32_vg1x4_s8j10svint8x4_t11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsudot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") { @@ -1064,16 +692,12 @@ void test_svsudot_multi_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint // Multi, indexed (signed by unsigned) // CHECK-LABEL: @test_svsudot_lane_za32_vg1x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svsudot_lane_za32_vg1x2_s8j10svint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svsudot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -1082,20 +706,12 @@ void test_svsudot_lane_za32_vg1x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8 // CHECK-LABEL: @test_svsudot_lane_za32_vg1x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svsudot_lane_za32_vg1x4_s8j10svint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svsudot_lane_za32_vg1x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c index 4d340c6b8a6cd4..6dd55663d7d348 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c @@ -10,21 +10,27 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x2(0, zn, 7); @@ -33,21 +39,27 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x2(0, zn, 7); @@ -55,21 +67,27 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x2(0, zn, 7); @@ -78,21 +96,27 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x2(0, zn, 7); @@ -100,21 +124,27 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x2(0, zn, 7); @@ -122,21 +152,27 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x2(0, zn, 7); @@ -144,21 +180,27 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x2(0, zn, 7); @@ -166,21 +208,27 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x2(0, zn, 7); @@ -188,21 +236,27 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti2.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 7) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x2(0, zn, 7); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c index 7fa74c1cb362a6..8650ec7f62dd83 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c @@ -10,6 +10,7 @@ // CHECK-LABEL: @test_svluti2_lane_zt_u8( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -19,10 +20,13 @@ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -32,7 +36,9 @@ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u8_x4(0, zn, 3); @@ -41,6 +47,7 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti2_lane_zt_s8( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -50,10 +57,13 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svluti2_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -63,7 +73,9 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s8_x4(0, zn, 3); @@ -71,6 +83,7 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti2_lane_zt_u16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -80,10 +93,13 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -93,7 +109,9 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u16_x4(0, zn, 3); @@ -101,6 +119,7 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -110,10 +129,13 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -123,7 +145,9 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s16_x4(0, zn, 3); @@ -131,6 +155,7 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -140,10 +165,13 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8f16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -153,7 +181,9 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f16_x4(0, zn, 3); @@ -161,6 +191,7 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti2_lane_zt_bf16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -170,10 +201,13 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svluti2_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -183,7 +217,9 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_bf16_x4(0, zn, 3); @@ -191,6 +227,7 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti2_lane_zt_u32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -200,10 +237,13 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -213,7 +253,9 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_u32_x4(0, zn, 3); @@ -221,6 +263,7 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti2_lane_zt_s32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -230,10 +273,13 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -243,7 +289,9 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_s32_x4(0, zn, 3); @@ -251,6 +299,7 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti2_lane_zt_f32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -260,10 +309,13 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svluti2_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti2.lane.zt.x4.nxv4f32(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -273,7 +325,9 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti2_lane_zt_f32_x4(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c index f223a5dcdcefa7..f4f11c9fc5b143 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c @@ -10,21 +10,27 @@ // CHECK-LABEL: @test_svluti4_lane_zt_u8( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u8_x2(0, zn, 3); @@ -33,21 +39,27 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0" // CHECK-LABEL: @test_svluti4_lane_zt_s8( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svluti4_lane_zt_s8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv16i8(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s8_x2(0, zn, 3); @@ -55,21 +67,27 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") // CHECK-LABEL: @test_svluti4_lane_zt_u16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x2(0, zn, 3); @@ -78,21 +96,27 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8i16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x2(0, zn, 3); @@ -100,21 +124,27 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8f16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x2(0, zn, 3); @@ -122,21 +152,27 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svluti4_lane_zt_bf16( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv8bf16(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x2(0, zn, 3); @@ -144,21 +180,27 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-LABEL: @test_svluti4_lane_zt_u32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x2(0, zn, 3); @@ -166,21 +208,27 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-LABEL: @test_svluti4_lane_zt_s32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4i32(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x2(0, zn, 3); @@ -188,21 +236,27 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-LABEL: @test_svluti4_lane_zt_f32( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.luti4.lane.zt.x2.nxv4f32(i32 0, [[ZN:%.*]], i32 3) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x2(0, zn, 3); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c index 89e17e8d585882..16a74213262357 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c @@ -8,9 +8,10 @@ #include -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_u16 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -20,11 +21,14 @@ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -34,15 +38,18 @@ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u16_x4(0, zn, 1); } -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_f16 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -52,11 +59,14 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8f16(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -66,15 +76,18 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f16_x4(0, zn, 1); } -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_bf16 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_bf16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -84,11 +97,14 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svluti4_lane_zt_bf16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8bf16(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -98,15 +114,18 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_bf16_x4(0, zn, 1); } -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_s16 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s16 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -116,11 +135,14 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s16u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv8i16(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -130,15 +152,18 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s16_x4(0, zn, 1); } -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_u32 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_u32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -148,11 +173,14 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_u32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -162,15 +190,18 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_u32_x4(0, zn, 1); } -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_s32 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_s32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -180,11 +211,14 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_s32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4i32(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -194,15 +228,18 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_s32_x4(0, zn, 1); } -// CHECK-LABEL: define dso_local @test_svluti4_lane_zt_f32 +// CHECK-LABEL: define dso_local { , , , } @test_svluti4_lane_zt_f32 // CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -212,11 +249,14 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svluti4_lane_zt_f32u11__SVUint8_t // CPP-CHECK-SAME: ( [[ZN:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, [[ZN]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -226,7 +266,9 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") { return svluti4_lane_zt_f32_x4(0, zn, 1); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c index bd8d57e3523312..efc68c0b42334c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c @@ -18,25 +18,27 @@ // CHECK-LABEL: @test_svmax_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x2)(zdn, zm); @@ -44,25 +46,27 @@ svint8x2_t test_svmax_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x2)(zdn, zm); @@ -70,25 +74,27 @@ svint16x2_t test_svmax_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x2)(zdn, zm); @@ -96,25 +102,27 @@ svint32x2_t test_svmax_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x2)(zdn, zm); @@ -122,25 +130,27 @@ svint64x2_t test_svmax_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x2)(zdn, zm); @@ -148,25 +158,27 @@ svuint8x2_t test_svmax_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x2)(zdn, zm); @@ -174,25 +186,27 @@ svuint16x2_t test_svmax_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x2)(zdn, zm); @@ -200,25 +214,27 @@ svuint32x2_t test_svmax_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm); @@ -226,25 +242,27 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm); @@ -252,25 +270,27 @@ svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x2)(zdn, zm); @@ -278,25 +298,27 @@ svfloat16x2_t test_svmax_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x2)(zdn, zm); @@ -304,25 +326,27 @@ svfloat32x2_t test_svmax_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x2)(zdn, zm); @@ -332,37 +356,35 @@ svfloat64x2_t test_svmax_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s8_x4)(zdn, zm); @@ -370,37 +392,35 @@ svint8x4_t test_svmax_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmax_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s16_x4)(zdn, zm); @@ -408,37 +428,35 @@ svint16x4_t test_svmax_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s32_x4)(zdn, zm); @@ -446,37 +464,35 @@ svint32x4_t test_svmax_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_s64_x4)(zdn, zm); @@ -484,37 +500,35 @@ svint64x4_t test_svmax_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmax_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svmax_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u8_x4)(zdn, zm); @@ -522,37 +536,35 @@ svuint8x4_t test_svmax_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmax_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u16_x4)(zdn, zm); @@ -560,37 +572,35 @@ svuint16x4_t test_svmax_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u32_x4)(zdn, zm); @@ -598,37 +608,35 @@ svuint32x4_t test_svmax_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm); @@ -636,37 +644,35 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmax_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm); @@ -674,37 +680,35 @@ svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmax_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f16_x4)(zdn, zm); @@ -712,37 +716,35 @@ svfloat16x4_t test_svmax_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f32_x4)(zdn, zm); @@ -750,37 +752,35 @@ svfloat32x4_t test_svmax_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmax_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmax_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_single_f64_x4)(zdn, zm); @@ -790,29 +790,27 @@ svfloat64x4_t test_svmax_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmax_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x2)(zdn, zm); @@ -820,29 +818,27 @@ svint8x2_t test_svmax_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x2)(zdn, zm); @@ -850,29 +846,27 @@ svint16x2_t test_svmax_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x2)(zdn, zm); @@ -880,29 +874,27 @@ svint32x2_t test_svmax_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x2)(zdn, zm); @@ -910,29 +902,27 @@ svint64x2_t test_svmax_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x2)(zdn, zm); @@ -940,29 +930,27 @@ svuint8x2_t test_svmax_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x2)(zdn, zm); @@ -970,29 +958,27 @@ svuint16x2_t test_svmax_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x2)(zdn, zm); @@ -1000,29 +986,27 @@ svuint32x2_t test_svmax_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm); @@ -1030,29 +1014,27 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm); @@ -1060,29 +1042,27 @@ svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x2)(zdn, zm); @@ -1090,29 +1070,27 @@ svfloat16x2_t test_svmax_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x2)(zdn, zm); @@ -1120,29 +1098,27 @@ svfloat32x2_t test_svmax_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x2)(zdn, zm); @@ -1152,45 +1128,35 @@ svfloat64x2_t test_svmax_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmax_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svmax_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s8_x4)(zdn, zm); @@ -1198,45 +1164,35 @@ svint8x4_t test_svmax_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s16_x4)(zdn, zm); @@ -1244,45 +1200,35 @@ svint16x4_t test_svmax_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s32_x4)(zdn, zm); @@ -1290,45 +1236,35 @@ svint32x4_t test_svmax_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_s64_x4)(zdn, zm); @@ -1336,45 +1272,35 @@ svint64x4_t test_svmax_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svmax_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u8_x4)(zdn, zm); @@ -1382,45 +1308,35 @@ svuint8x4_t test_svmax_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmax_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u16_x4)(zdn, zm); @@ -1428,45 +1344,35 @@ svuint16x4_t test_svmax_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u32_x4)(zdn, zm); @@ -1474,45 +1380,35 @@ svuint32x4_t test_svmax_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm); @@ -1520,45 +1416,35 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmax_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_bf16_x4)(zdn, zm); @@ -1566,45 +1452,35 @@ svbfloat16x4_t test_svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmax_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f16_x4)(zdn, zm); @@ -1612,45 +1488,35 @@ svfloat16x4_t test_svmax_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f32_x4)(zdn, zm); @@ -1658,45 +1524,35 @@ svfloat32x4_t test_svmax_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmax_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmax_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svmax_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmax,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c index 07659932bef0a8..5d06895497cc7e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c @@ -19,25 +19,27 @@ // CHECK-LABEL: @test_svmaxnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x2)(zdn, zm); @@ -45,25 +47,27 @@ svbfloat16x2_t test_svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x2)(zdn, zm); @@ -71,25 +75,27 @@ svfloat16x2_t test_svmaxnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x2)(zdn, zm); @@ -97,25 +103,27 @@ svfloat32x2_t test_svmaxnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x2)(zdn, zm); @@ -125,37 +133,35 @@ svfloat64x2_t test_svmaxnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svmaxnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_bf16_x4)(zdn, zm); @@ -163,37 +169,35 @@ svbfloat16x4_t test_svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svmaxnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f16_x4)(zdn, zm); @@ -201,37 +205,35 @@ svfloat16x4_t test_svmaxnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f32_x4)(zdn, zm); @@ -239,37 +241,35 @@ svfloat32x4_t test_svmaxnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svmaxnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_single_f64_x4)(zdn, zm); @@ -279,29 +279,27 @@ svfloat64x4_t test_svmaxnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svmaxnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x2)(zdn, zm); @@ -309,29 +307,27 @@ svbfloat16x2_t test_svmaxnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x2)(zdn, zm); @@ -339,29 +335,27 @@ svfloat16x2_t test_svmaxnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x2)(zdn, zm); @@ -369,29 +363,27 @@ svfloat32x2_t test_svmaxnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x2)(zdn, zm); @@ -401,45 +393,35 @@ svfloat64x2_t test_svmaxnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svmaxnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_bf16_x4)(zdn, zm); @@ -447,45 +429,35 @@ svbfloat16x4_t test_svmaxnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmaxnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f16_x4)(zdn, zm); @@ -493,45 +465,35 @@ svfloat16x4_t test_svmaxnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f32_x4)(zdn, zm); @@ -539,45 +501,35 @@ svfloat32x4_t test_svmaxnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svmaxnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svmaxnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svmaxnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmaxnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c index fe7b74c0052476..2fa7feeee404e6 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c @@ -18,25 +18,27 @@ // CHECK-LABEL: @test_svmin_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x2)(zdn, zm); @@ -44,25 +46,27 @@ svint8x2_t test_svmin_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x2)(zdn, zm); @@ -70,25 +74,27 @@ svint16x2_t test_svmin_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x2)(zdn, zm); @@ -96,25 +102,27 @@ svint32x2_t test_svmin_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x2)(zdn, zm); @@ -122,25 +130,27 @@ svint64x2_t test_svmin_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x2)(zdn, zm); @@ -148,25 +158,27 @@ svuint8x2_t test_svmin_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x2)(zdn, zm); @@ -174,25 +186,27 @@ svuint16x2_t test_svmin_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x2)(zdn, zm); @@ -200,25 +214,27 @@ svuint32x2_t test_svmin_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x2)(zdn, zm); @@ -226,25 +242,27 @@ svuint64x2_t test_svmin_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x2)(zdn, zm); @@ -252,25 +270,27 @@ svbfloat16x2_t test_svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x2)(zdn, zm); @@ -278,25 +298,27 @@ svfloat16x2_t test_svmin_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x2)(zdn, zm); @@ -304,25 +326,27 @@ svfloat32x2_t test_svmin_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x2)(zdn, zm); @@ -332,37 +356,35 @@ svfloat64x2_t test_svmin_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s8_x4)(zdn, zm); @@ -370,37 +392,35 @@ svint8x4_t test_svmin_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svmin_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s16_x4)(zdn, zm); @@ -408,37 +428,35 @@ svint16x4_t test_svmin_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s32_x4)(zdn, zm); @@ -446,37 +464,35 @@ svint32x4_t test_svmin_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_s64_x4)(zdn, zm); @@ -484,37 +500,35 @@ svint64x4_t test_svmin_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_stream // CHECK-LABEL: @test_svmin_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svmin_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u8_x4)(zdn, zm); @@ -522,37 +536,35 @@ svuint8x4_t test_svmin_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streami // CHECK-LABEL: @test_svmin_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u16_x4)(zdn, zm); @@ -560,37 +572,35 @@ svuint16x4_t test_svmin_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u32_x4)(zdn, zm); @@ -598,37 +608,35 @@ svuint32x4_t test_svmin_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_u64_x4)(zdn, zm); @@ -636,37 +644,35 @@ svuint64x4_t test_svmin_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str // CHECK-LABEL: @test_svmin_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svmin_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_bf16_x4)(zdn, zm); @@ -674,37 +680,35 @@ svbfloat16x4_t test_svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __ // CHECK-LABEL: @test_svmin_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f16_x4)(zdn, zm); @@ -712,37 +716,35 @@ svfloat16x4_t test_svmin_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f32_x4)(zdn, zm); @@ -750,37 +752,35 @@ svfloat32x4_t test_svmin_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_ // CHECK-LABEL: @test_svmin_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svmin_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_single_f64_x4)(zdn, zm); @@ -790,29 +790,27 @@ svfloat64x4_t test_svmin_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_ // CHECK-LABEL: @test_svmin_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x2)(zdn, zm); @@ -820,29 +818,27 @@ svint8x2_t test_svmin_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x2)(zdn, zm); @@ -850,29 +846,27 @@ svint16x2_t test_svmin_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x2)(zdn, zm); @@ -880,29 +874,27 @@ svint32x2_t test_svmin_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x2)(zdn, zm); @@ -910,29 +902,27 @@ svint64x2_t test_svmin_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x2)(zdn, zm); @@ -940,29 +930,27 @@ svuint8x2_t test_svmin_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x2)(zdn, zm); @@ -970,29 +958,27 @@ svuint16x2_t test_svmin_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x2)(zdn, zm); @@ -1000,29 +986,27 @@ svuint32x2_t test_svmin_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x2)(zdn, zm); @@ -1030,29 +1014,27 @@ svuint64x2_t test_svmin_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x2)(zdn, zm); @@ -1060,29 +1042,27 @@ svbfloat16x2_t test_svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x2)(zdn, zm); @@ -1090,29 +1070,27 @@ svfloat16x2_t test_svmin_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x2)(zdn, zm); @@ -1120,29 +1098,27 @@ svfloat32x2_t test_svmin_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x2)(zdn, zm); @@ -1152,45 +1128,35 @@ svfloat64x2_t test_svmin_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_strea // CHECK-LABEL: @test_svmin_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svmin_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s8_x4)(zdn, zm); @@ -1198,45 +1164,35 @@ svint8x4_t test_svmin_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s16_x4)(zdn, zm); @@ -1244,45 +1200,35 @@ svint16x4_t test_svmin_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s32_x4)(zdn, zm); @@ -1290,45 +1236,35 @@ svint32x4_t test_svmin_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_s64_x4)(zdn, zm); @@ -1336,45 +1272,35 @@ svint64x4_t test_svmin_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svmin_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u8_x4)(zdn, zm); @@ -1382,45 +1308,35 @@ svuint8x4_t test_svmin_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svmin_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u16_x4)(zdn, zm); @@ -1428,45 +1344,35 @@ svuint16x4_t test_svmin_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u32_x4)(zdn, zm); @@ -1474,45 +1380,35 @@ svuint32x4_t test_svmin_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_u64_x4)(zdn, zm); @@ -1520,45 +1416,35 @@ svuint64x4_t test_svmin_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin // CHECK-LABEL: @test_svmin_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svmin_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_bf16_x4)(zdn, zm); @@ -1566,45 +1452,35 @@ svbfloat16x4_t test_svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_s // CHECK-LABEL: @test_svmin_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f16_x4)(zdn, zm); @@ -1612,45 +1488,35 @@ svfloat16x4_t test_svmin_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f32_x4)(zdn, zm); @@ -1658,45 +1524,35 @@ svfloat32x4_t test_svmin_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_strea // CHECK-LABEL: @test_svmin_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svmin_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svmin_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svmin,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c index 3b221c030eddf9..71b8914b816cad 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c @@ -19,25 +19,27 @@ // CHECK-LABEL: @test_svminnm_single_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x2)(zdn, zm); @@ -45,25 +47,27 @@ svbfloat16x2_t test_svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x213svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x2)(zdn, zm); @@ -71,25 +75,27 @@ svfloat16x2_t test_svminnm_single_f16_x2(svfloat16x2_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x2)(zdn, zm); @@ -97,25 +103,27 @@ svfloat32x2_t test_svminnm_single_f32_x2(svfloat32x2_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x213svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x2)(zdn, zm); @@ -125,37 +133,35 @@ svfloat64x2_t test_svminnm_single_f64_x2(svfloat64x2_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_single_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svminnm_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_bf16_x4)(zdn, zm); @@ -163,37 +169,35 @@ svbfloat16x4_t test_svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) // CHECK-LABEL: @test_svminnm_single_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f16_x413svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f16_x4)(zdn, zm); @@ -201,37 +205,35 @@ svfloat16x4_t test_svminnm_single_f16_x4(svfloat16x4_t zdn, svfloat16_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f32_x413svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f32_x4)(zdn, zm); @@ -239,37 +241,35 @@ svfloat32x4_t test_svminnm_single_f32_x4(svfloat32x4_t zdn, svfloat32_t zm) __ar // CHECK-LABEL: @test_svminnm_single_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_single_f64_x413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_single_f64_x4)(zdn, zm); @@ -279,29 +279,27 @@ svfloat64x4_t test_svminnm_single_f64_x4(svfloat64x4_t zdn, svfloat64_t zm) __ar // CHECK-LABEL: @test_svminnm_multi_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x214svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x2)(zdn, zm); @@ -309,29 +307,27 @@ svbfloat16x2_t test_svminnm_multi_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x213svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x2)(zdn, zm); @@ -339,29 +335,27 @@ svfloat16x2_t test_svminnm_multi_f16_x2(svfloat16x2_t zdn, svfloat16x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x213svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x2)(zdn, zm); @@ -369,29 +363,27 @@ svfloat32x2_t test_svminnm_multi_f32_x2(svfloat32x2_t zdn, svfloat32x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x213svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x2)(zdn, zm); @@ -401,45 +393,35 @@ svfloat64x2_t test_svminnm_multi_f64_x2(svfloat64x2_t zdn, svfloat64x2_t zm) __a // CHECK-LABEL: @test_svminnm_multi_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svminnm_multi_bf16_x414svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_bf16_x4)(zdn, zm); @@ -447,45 +429,35 @@ svbfloat16x4_t test_svminnm_multi_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svminnm_multi_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f16_x413svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f16_x4)(zdn, zm); @@ -493,45 +465,35 @@ svfloat16x4_t test_svminnm_multi_f16_x4(svfloat16x4_t zdn, svfloat16x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f32_x413svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f32_x4)(zdn, zm); @@ -539,45 +501,35 @@ svfloat32x4_t test_svminnm_multi_f32_x4(svfloat32x4_t zdn, svfloat32x4_t zm) __a // CHECK-LABEL: @test_svminnm_multi_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svminnm_multi_f64_x413svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svminnm_multi_f64_x4(svfloat64x4_t zdn, svfloat64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svminnm,_f64_x4)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c index cd8adf4a81ce71..74859c0a23bbba 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c @@ -19,20 +19,12 @@ // Multi, multi // CHECK-LABEL: @test_svmla2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla2_f32j13svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { @@ -41,28 +33,12 @@ void test_svmla2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __ // CHECK-LABEL: @test_svmla4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla4_f32j13svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_inout("za") { @@ -73,16 +49,12 @@ void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __ // Multi, single // CHECK-LABEL: @test_svmla_single2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single2_f32j13svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -91,20 +63,12 @@ void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z // CHECK-LABEL: @test_svmla_single4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single4_f32j13svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -115,16 +79,12 @@ void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t z // Multi, indexed // CHECK-LABEL: @test_svmla_lane2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f32j13svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -133,20 +93,12 @@ void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) // CHECK-LABEL: @test_svmla_lane4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f32j13svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -157,20 +109,12 @@ void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) // Multi, multi // CHECK-LABEL: @test_svmla2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla2_f64j13svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { @@ -179,28 +123,12 @@ void test_svmla2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __ // CHECK-LABEL: @test_svmla4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla4_f64j13svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_inout("za") { @@ -211,16 +139,12 @@ void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __ // Multi, single // CHECK-LABEL: @test_svmla_single2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single2_f64j13svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { @@ -229,20 +153,12 @@ void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z // CHECK-LABEL: @test_svmla_single4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single4_f64j13svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { @@ -253,16 +169,12 @@ void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t z // Multi, indexed // CHECK-LABEL: @test_svmla_lane2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f64j13svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { @@ -271,20 +183,12 @@ void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) // CHECK-LABEL: @test_svmla_lane4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f64j13svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svmla_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c index c0444c99b63470..9d8b1351debc2a 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c @@ -19,20 +19,12 @@ // Multi, multi // CHECK-LABEL: @test_svmla2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla2_f16j13svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") @@ -42,20 +34,12 @@ void test_svmla2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __ // CHECK-LABEL: @test_svmla2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svmla2_bf16j14svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") @@ -65,20 +49,12 @@ void test_svmla2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmla2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla2_u16j12svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") @@ -88,20 +64,12 @@ void test_svmla2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __ar // CHECK-LABEL: @test_svmla2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla2_s16j11svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") @@ -111,28 +79,12 @@ void test_svmla2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_ // CHECK-LABEL: @test_svmla4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla4_f16j13svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") @@ -142,28 +94,12 @@ void test_svmla4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __ // CHECK-LABEL: @test_svmla4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svmla4_bf16j14svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") @@ -173,28 +109,12 @@ void test_svmla4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmla4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla4_u16j12svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") @@ -204,28 +124,12 @@ void test_svmla4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __ar // CHECK-LABEL: @test_svmla4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmla4_s16j11svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") @@ -297,16 +201,12 @@ void test_svmla_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __a // CHECK-LABEL: @test_svmla_single2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single2_f16j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -316,16 +216,12 @@ void test_svmla_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t z // CHECK-LABEL: @test_svmla_single2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmla_single2_bf16j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -335,16 +231,12 @@ void test_svmla_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_ // CHECK-LABEL: @test_svmla_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -354,16 +246,12 @@ void test_svmla_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CHECK-LABEL: @test_svmla_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -373,20 +261,12 @@ void test_svmla_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CHECK-LABEL: @test_svmla_single4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single4_f16j13svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -396,20 +276,12 @@ void test_svmla_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t z // CHECK-LABEL: @test_svmla_single4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmla_single4_bf16j14svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -419,20 +291,12 @@ void test_svmla_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_ // CHECK-LABEL: @test_svmla_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -442,20 +306,12 @@ void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CHECK-LABEL: @test_svmla_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmla_single4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -529,16 +385,12 @@ void test_svmla_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm // CHECK-LABEL: @test_svmla_lane2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f16j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -548,16 +400,12 @@ void test_svmla_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) // CHECK-LABEL: @test_svmla_lane2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svmla_lane2_bf16j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -567,16 +415,12 @@ void test_svmla_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t // CHECK-LABEL: @test_svmla_lane2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -586,16 +430,12 @@ void test_svmla_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) _ // CHECK-LABEL: @test_svmla_lane2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -605,20 +445,12 @@ void test_svmla_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __a // CHECK-LABEL: @test_svmla_lane4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f16j13svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -628,20 +460,12 @@ void test_svmla_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) // CHECK-LABEL: @test_svmla_lane4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svmla_lane4_bf16j14svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -651,20 +475,12 @@ void test_svmla_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t // CHECK-LABEL: @test_svmla_lane4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -674,20 +490,12 @@ void test_svmla_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) _ // CHECK-LABEL: @test_svmla_lane4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmla_lane4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmla_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c index 8e4d92fce6c6b7..4efc226c10e685 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlall.c @@ -185,16 +185,12 @@ void test_usmlall_single_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) _ // CHECK-LABEL: @test_svmla_single_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmla_single_x2_s8j10svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -204,16 +200,12 @@ void test_svmla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ // CHECK-LABEL: @test_svmla_single_x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmla_single_x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -223,16 +215,12 @@ void test_svmla_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) // CHECK-LABEL: @test_svmla_single_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmla_single_x2_u8j11svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -242,16 +230,12 @@ void test_svmla_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) // CHECK-LABEL: @test_svmla_single_x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmla_single_x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -263,16 +247,12 @@ void test_svmla_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t z // CHECK-LABEL: @test_svmls_single_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmls_single_x2_s8j10svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -282,16 +262,12 @@ void test_svmls_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ // CHECK-LABEL: @test_svmls_single_x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmls_single_x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -301,16 +277,12 @@ void test_svmls_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) // CHECK-LABEL: @test_svmls_single_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmls_single_x2_u8j11svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -320,16 +292,12 @@ void test_svmls_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) // CHECK-LABEL: @test_svmls_single_x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmls_single_x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -341,16 +309,12 @@ void test_svmls_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t z // CHECK-LABEL: @test_svsumla_single_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsumla_single_x2_s8j10svint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsumla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -362,16 +326,12 @@ void test_svsumla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) // CHECK-LABEL: @test_usmlall_single_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_usmlall_single_x2_u8j11svuint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_usmlall_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -387,20 +347,12 @@ void test_usmlall_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) // CHECK-LABEL: @test_svmla_single_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmla_single_x4_s8j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -410,20 +362,12 @@ void test_svmla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ // CHECK-LABEL: @test_svmla_single_x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmla_single_x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -433,20 +377,12 @@ void test_svmla_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) // CHECK-LABEL: @test_svmla_single_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmla_single_x4_u8j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -456,20 +392,12 @@ void test_svmla_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) // CHECK-LABEL: @test_svmla_single_x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmla_single_x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmla_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -481,20 +409,12 @@ void test_svmla_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t z // CHECK-LABEL: @test_svmls_single_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmls_single_x4_s8j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -504,20 +424,12 @@ void test_svmls_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ // CHECK-LABEL: @test_svmls_single_x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmls_single_x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -527,20 +439,12 @@ void test_svmls_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) // CHECK-LABEL: @test_svmls_single_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmls_single_x4_u8j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -550,20 +454,12 @@ void test_svmls_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) // CHECK-LABEL: @test_svmls_single_x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svmls_single_x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -575,20 +471,12 @@ void test_svmls_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t z // CHECK-LABEL: @test_svsumla_single_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsumla_single_x4_s8j10svint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsumla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -600,20 +488,12 @@ void test_svsumla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) // CHECK-LABEL: @test_usmlall_single_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_usmlall_single_x4_u8j11svuint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_usmlall_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -629,20 +509,12 @@ void test_usmlall_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) // CHECK-LABEL: @test_mlal_multi_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x2_s8j10svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") @@ -652,20 +524,12 @@ void test_mlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __ // CHECK-LABEL: @test_mlal_multi_x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x2_s16j11svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") @@ -675,20 +539,12 @@ void test_mlal_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) // CHECK-LABEL: @test_mlal_multi_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x2_u8j11svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") @@ -698,20 +554,12 @@ void test_mlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) // CHECK-LABEL: @test_mlal_multi_x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x2_u16j12svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") @@ -723,20 +571,12 @@ void test_mlal_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t z // CHECK-LABEL: @test_mlsl_multi_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x2_s8j10svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") @@ -746,20 +586,12 @@ void test_mlsl_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __ // CHECK-LABEL: @test_mlsl_multi_x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x2_s16j11svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") @@ -769,20 +601,12 @@ void test_mlsl_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) // CHECK-LABEL: @test_mlsl_multi_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x2_u8j11svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") @@ -792,20 +616,12 @@ void test_mlsl_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) // CHECK-LABEL: @test_mlsl_multi_x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x2_u16j12svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") @@ -817,20 +633,12 @@ void test_mlsl_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t z // CHECK-LABEL: @test_sumlal_multi_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP2]], [[TMP3]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_sumlal_multi_x2_s8j10svint8x2_t11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP2]], [[TMP3]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_sumlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za") @@ -842,20 +650,12 @@ void test_sumlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) // CHECK-LABEL: @test_usmlal_multi_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_usmlal_multi_x2_u8j11svuint8x2_t10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_usmlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za") @@ -871,28 +671,12 @@ void test_usmlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) // CHECK-LABEL: @test_mlal_multi_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x4_s8j10svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") @@ -902,28 +686,12 @@ void test_mlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __ // CHECK-LABEL: @test_mlal_multi_x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x4_s16j11svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") @@ -933,28 +701,12 @@ void test_mlal_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) // CHECK-LABEL: @test_mlal_multi_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x4_u8j11svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") @@ -964,28 +716,12 @@ void test_mlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) // CHECK-LABEL: @test_mlal_multi_x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x4_u16j12svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlal_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") @@ -997,28 +733,12 @@ void test_mlal_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t z // CHECK-LABEL: @test_mlsl_multi_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x4_s8j10svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") @@ -1028,28 +748,12 @@ void test_mlsl_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __ // CHECK-LABEL: @test_mlsl_multi_x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x4_s16j11svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") @@ -1059,28 +763,12 @@ void test_mlsl_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) // CHECK-LABEL: @test_mlsl_multi_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x4_u8j11svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") @@ -1090,28 +778,12 @@ void test_mlsl_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) // CHECK-LABEL: @test_mlsl_multi_x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x4_u16j12svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_mlsl_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") @@ -1123,28 +795,12 @@ void test_mlsl_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t z // CHECK-LABEL: @test_sumlal_multi_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_sumlal_multi_x4_s8j10svint8x4_t11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_sumlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za") @@ -1156,28 +812,12 @@ void test_sumlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) // CHECK-LABEL: @test_usmlal_multi_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_usmlal_multi_x4_u8j11svuint8x4_t10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_usmlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za") @@ -1355,16 +995,12 @@ void test_usmlall_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __a // CHECK-LABEL: @test_smlal_lane_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x2_s8j10svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -1374,16 +1010,12 @@ void test_smlal_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ar // CHECK-LABEL: @test_smlal_lane_x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -1393,16 +1025,12 @@ void test_smlal_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CHECK-LABEL: @test_smlal_lane_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x2_u8j11svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -1412,16 +1040,12 @@ void test_smlal_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __ // CHECK-LABEL: @test_smlal_lane_x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -1433,16 +1057,12 @@ void test_smlal_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CHECK-LABEL: @test_smlsl_lane_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x2_s8j10svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -1452,16 +1072,12 @@ void test_smlsl_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __ar // CHECK-LABEL: @test_smlsl_lane_x2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -1471,16 +1087,12 @@ void test_smlsl_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CHECK-LABEL: @test_smlsl_lane_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x2_u8j11svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -1490,16 +1102,12 @@ void test_smlsl_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __ // CHECK-LABEL: @test_smlsl_lane_x2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -1511,16 +1119,12 @@ void test_smlsl_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CHECK-LABEL: @test_sumlall_lane_x2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_sumlall_lane_x2_s8j10svint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_sumlall_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -1530,16 +1134,12 @@ void test_sumlall_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) _ // CHECK-LABEL: @test_usmlall_lane_x2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_usmlall_lane_x2_u8j11svuint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_usmlall_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -1555,20 +1155,12 @@ void test_usmlall_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) _ // CHECK-LABEL: @test_smlal_lane_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x4_s8j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -1578,20 +1170,12 @@ void test_smlal_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ar // CHECK-LABEL: @test_smlal_lane_x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -1601,20 +1185,12 @@ void test_smlal_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) _ // CHECK-LABEL: @test_smlal_lane_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x4_u8j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -1624,20 +1200,12 @@ void test_smlal_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __ // CHECK-LABEL: @test_smlal_lane_x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlal_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -1649,20 +1217,12 @@ void test_smlal_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CHECK-LABEL: @test_smlsl_lane_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x4_s8j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") @@ -1672,20 +1232,12 @@ void test_smlsl_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __ar // CHECK-LABEL: @test_smlsl_lane_x4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -1695,20 +1247,12 @@ void test_smlsl_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) _ // CHECK-LABEL: @test_smlsl_lane_x4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x4_u8j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -1718,20 +1262,12 @@ void test_smlsl_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __ // CHECK-LABEL: @test_smlsl_lane_x4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_smlsl_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -1743,20 +1279,12 @@ void test_smlsl_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CHECK-LABEL: @test_sumlall_lane_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_sumlall_lane_x4_s8j10svint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_sumlall_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") @@ -1768,20 +1296,12 @@ void test_sumlall_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) _ // CHECK-LABEL: @test_usmlall_lane_x4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_usmlall_lane_x4_s8j11svuint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 15) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 15) // CPP-CHECK-NEXT: ret void // void test_usmlall_lane_x4_s8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c index 1986317ca719dd..2b1554cd9d8b08 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c @@ -19,20 +19,12 @@ // Multi, multi // CHECK-LABEL: @test_svmls2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls2_f32j13svfloat32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming __arm_inout("za") { @@ -41,28 +33,12 @@ void test_svmls2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) __ // CHECK-LABEL: @test_svmls4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls4_f32j13svfloat32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __arm_streaming __arm_inout("za") { @@ -73,16 +49,12 @@ void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __ // Multi, single // CHECK-LABEL: @test_svmls_single2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single2_f32j13svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -91,20 +63,12 @@ void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z // CHECK-LABEL: @test_svmls_single4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single4_f32j13svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -115,16 +79,12 @@ void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t z // Multi, indexed // CHECK-LABEL: @test_svmls_lane2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_f32j13svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svmls_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -133,20 +93,12 @@ void test_svmls_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) // CHECK-LABEL: @test_svmls_lane4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_f32j13svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svmls_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_inout("za") { @@ -157,20 +109,12 @@ void test_svmls_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) // Multi, multi // CHECK-LABEL: @test_svmls2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls2_f64j13svfloat64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming __arm_inout("za") { @@ -179,28 +123,12 @@ void test_svmls2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) __ // CHECK-LABEL: @test_svmls4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls4_f64j13svfloat64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __arm_streaming __arm_inout("za") { @@ -211,16 +139,12 @@ void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __ // Multi, single // CHECK-LABEL: @test_svmls_single2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single2_f64j13svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { @@ -229,20 +153,12 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z // CHECK-LABEL: @test_svmls_single4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single4_f64j13svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { @@ -253,16 +169,12 @@ void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t z // Multi, indexed // CHECK-LABEL: @test_svmls_lane2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_f64j13svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svmls_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { @@ -271,20 +183,12 @@ void test_svmls_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) // CHECK-LABEL: @test_svmls_lane4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_f64j13svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svmls_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c index 121d0fad2ae9bd..e56ffaa1db03e5 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c @@ -19,20 +19,12 @@ // Multi, multi // CHECK-LABEL: @test_svmls2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls2_f16j13svfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") @@ -42,20 +34,12 @@ void test_svmls2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __ // CHECK-LABEL: @test_svmls2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svmls2_bf16j14svbfloat16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") @@ -65,20 +49,12 @@ void test_svmls2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) // CHECK-LABEL: @test_svmls2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls2_u16j12svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za") @@ -88,20 +64,12 @@ void test_svmls2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __ar // CHECK-LABEL: @test_svmls2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls2_s16j11svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za") @@ -111,28 +79,12 @@ void test_svmls2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_ // CHECK-LABEL: @test_svmls4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls4_f16j13svfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") @@ -142,28 +94,12 @@ void test_svmls4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __ // CHECK-LABEL: @test_svmls4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svmls4_bf16j14svbfloat16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") @@ -173,28 +109,12 @@ void test_svmls4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) // CHECK-LABEL: @test_svmls4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls4_u16j12svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za") @@ -204,28 +124,12 @@ void test_svmls4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __ar // CHECK-LABEL: @test_svmls4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svmls4_s16j11svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za") @@ -297,16 +201,12 @@ void test_svmls_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __a // CHECK-LABEL: @test_svmls_single2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single2_f16j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -316,16 +216,12 @@ void test_svmls_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t z // CHECK-LABEL: @test_svmls_single2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmls_single2_bf16j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -335,16 +231,12 @@ void test_svmls_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_ // CHECK-LABEL: @test_svmls_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -354,16 +246,12 @@ void test_svmls_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) // CHECK-LABEL: @test_svmls_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -373,20 +261,12 @@ void test_svmls_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) _ // CHECK-LABEL: @test_svmls_single4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single4_f16j13svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -396,20 +276,12 @@ void test_svmls_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t z // CHECK-LABEL: @test_svmls_single4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svmls_single4_bf16j14svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -419,20 +291,12 @@ void test_svmls_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_ // CHECK-LABEL: @test_svmls_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -442,20 +306,12 @@ void test_svmls_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) // CHECK-LABEL: @test_svmls_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svmls_single4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svmls_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -529,16 +385,12 @@ void test_svmls_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm // CHECK-LABEL: @test_svmls_lane2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_f16j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -548,16 +400,12 @@ void test_svmls_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) // CHECK-LABEL: @test_svmls_lane2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svmls_lane2_bf16j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -567,16 +415,12 @@ void test_svmls_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t // CHECK-LABEL: @test_svmls_lane2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_u16j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -586,16 +430,12 @@ void test_svmls_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) _ // CHECK-LABEL: @test_svmls_lane2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_s16j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") @@ -605,20 +445,12 @@ void test_svmls_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __a // CHECK-LABEL: @test_svmls_lane4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_f16j13svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") @@ -628,20 +460,12 @@ void test_svmls_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) // CHECK-LABEL: @test_svmls_lane4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svmls_lane4_bf16j14svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") @@ -651,20 +475,12 @@ void test_svmls_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t // CHECK-LABEL: @test_svmls_lane4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_u16j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") @@ -674,20 +490,12 @@ void test_svmls_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) _ // CHECK-LABEL: @test_svmls_lane4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_s16j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 7) // CPP-CHECK-NEXT: ret void // void test_svmls_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c index e042161262b33b..da17c6b13d17c8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -9,21 +9,27 @@ // CHECK-LABEL: @test_svread_ver_za8_u8_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg2(0, base); @@ -31,21 +37,27 @@ svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg2(0, base); @@ -53,21 +65,27 @@ svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg2(0, base); @@ -75,21 +93,27 @@ svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg2(0, base); @@ -97,6 +121,7 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za8_u8_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -106,10 +131,13 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -119,7 +147,9 @@ svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_u8_vg4(0, base); @@ -127,6 +157,7 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_hor_za8_s8_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -136,10 +167,13 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -149,7 +183,9 @@ svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za8_s8_vg4(0, base); @@ -157,6 +193,7 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_ver_za8_u8_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -166,10 +203,13 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -179,7 +219,9 @@ svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_u8_vg4(0, base); @@ -187,6 +229,7 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_ver_za8_s8_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -196,10 +239,13 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -209,7 +255,9 @@ svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) __arm_streaming __arm_in(" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za8_s8_vg4(0, base); @@ -217,21 +265,27 @@ svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) __arm_streaming __arm_in("z // CHECK-LABEL: @test_svread_hor_za16_u16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg2(1, base); @@ -239,21 +293,27 @@ svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg2(1, base); @@ -261,21 +321,27 @@ svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg2(1, base); @@ -283,21 +349,27 @@ svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg2(1, base); @@ -305,21 +377,27 @@ svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg2(1, base); @@ -327,21 +405,27 @@ svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg2(1, base); @@ -349,21 +433,27 @@ svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg2(1, base); @@ -371,21 +461,27 @@ svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg2(1, base); @@ -393,6 +489,7 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za16_u16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -402,10 +499,13 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -415,7 +515,9 @@ svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_u16_vg4(1, base); @@ -423,6 +525,7 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -432,10 +535,13 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -445,7 +551,9 @@ svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_bf16_vg4(1, base); @@ -453,6 +561,7 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_hor_za16_f16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -462,10 +571,13 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -475,7 +587,9 @@ svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_f16_vg4(1, base); @@ -483,6 +597,7 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za16_s16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -492,10 +607,13 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -505,7 +623,9 @@ svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za16_s16_vg4(1, base); @@ -513,6 +633,7 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za16_u16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -522,10 +643,13 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -535,7 +659,9 @@ svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_u16_vg4(1, base); @@ -543,6 +669,7 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -552,10 +679,13 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -565,7 +695,9 @@ svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_bf16_vg4(1, base); @@ -573,6 +705,7 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-LABEL: @test_svread_ver_za16_f16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -582,10 +715,13 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -595,7 +731,9 @@ svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) __arm_streaming __ar // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_f16_vg4(1, base); @@ -603,6 +741,7 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za16_s16_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -612,10 +751,13 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -625,7 +767,9 @@ svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za16_s16_vg4(1, base); @@ -633,21 +777,27 @@ svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg2(3, base); @@ -655,21 +805,27 @@ svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg2(3, base); @@ -677,21 +833,27 @@ svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg2(3, base); @@ -699,21 +861,27 @@ svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg2(3, base); @@ -721,21 +889,27 @@ svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg2(3, base); @@ -743,21 +917,27 @@ svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg2(3, base); @@ -765,6 +945,7 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za32_u32_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -774,10 +955,13 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -787,7 +971,9 @@ svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_u32_vg4(3, base); @@ -795,6 +981,7 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za32_f32_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -804,10 +991,13 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -817,7 +1007,9 @@ svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_f32_vg4(3, base); @@ -825,6 +1017,7 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za32_s32_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -834,10 +1027,13 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -847,7 +1043,9 @@ svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za32_s32_vg4(3, base); @@ -855,6 +1053,7 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za32_u32_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -864,10 +1063,13 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -877,7 +1079,9 @@ svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_u32_vg4(3, base); @@ -885,6 +1089,7 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za32_f32_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -894,10 +1099,13 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -907,7 +1115,9 @@ svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_f32_vg4(3, base); @@ -915,6 +1125,7 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za32_s32_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -924,10 +1135,13 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -937,7 +1151,9 @@ svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za32_s32_vg4(3, base); @@ -945,21 +1161,27 @@ svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg2(7, base); @@ -967,21 +1189,27 @@ svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg2(7, base); @@ -989,21 +1217,27 @@ svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg2(7, base); @@ -1011,21 +1245,27 @@ svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg2(7, base); @@ -1033,21 +1273,27 @@ svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg2(7, base); @@ -1055,21 +1301,27 @@ svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za64_s64_vg2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg2(7, base); @@ -1077,6 +1329,7 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_hor_za64_u64_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1086,10 +1339,13 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1099,7 +1355,9 @@ svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_u64_vg4(7, base); @@ -1107,6 +1365,7 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_hor_za64_f64_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1116,10 +1375,13 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1129,7 +1391,9 @@ svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_f64_vg4(7, base); @@ -1137,6 +1401,7 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_hor_za64_s64_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1146,10 +1411,13 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1159,7 +1427,9 @@ svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_hor_za64_s64_vg4(7, base); @@ -1167,6 +1437,7 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_ver_za64_u64_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1176,10 +1447,13 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1189,7 +1463,9 @@ svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_u64_vg4(7, base); @@ -1197,6 +1473,7 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-LABEL: @test_svread_ver_za64_f64_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1206,10 +1483,13 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1219,7 +1499,9 @@ svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_f64_vg4(7, base); @@ -1227,6 +1509,7 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_ver_za64_s64_vg4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1236,10 +1519,13 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1249,7 +1535,9 @@ svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in("za") { return svread_ver_za64_s64_vg4(7, base); @@ -1257,21 +1545,27 @@ svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za8_s8_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x2(base); @@ -1279,21 +1573,27 @@ svint8x2_t test_svread_za8_s8_vg1x2(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x2(base); @@ -1301,43 +1601,56 @@ svuint8x2_t test_svread_za8_u8_vg1x2(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svread_za16_s16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x2(base); } +// // CHECK-LABEL: @test_svread_za16_u16_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x2(base); @@ -1345,21 +1658,27 @@ svuint16x2_t test_svread_za16_u16_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x2(base); @@ -1367,21 +1686,27 @@ svbfloat16x2_t test_svread_za16_bf16_vg1x2(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x2(base); @@ -1389,21 +1714,27 @@ svfloat16x2_t test_svread_za16_f16_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x2(base); @@ -1411,21 +1742,27 @@ svint32x2_t test_svread_za32_s32_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x2(base); @@ -1433,21 +1770,27 @@ svuint32x2_t test_svread_za32_u32_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x2(base); @@ -1455,21 +1798,27 @@ svfloat32x2_t test_svread_za32_f32_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x2(base); @@ -1477,21 +1826,27 @@ svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x2(base); @@ -1499,21 +1854,27 @@ svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x2(base); @@ -1521,6 +1882,7 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za8_s8_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1530,10 +1892,13 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_s8_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1543,7 +1908,9 @@ svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) __arm_streaming __arm_in(" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_s8_vg1x4(base); @@ -1551,6 +1918,7 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CHECK-LABEL: @test_svread_za8_u8_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1560,10 +1928,13 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svread_za8_u8_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1573,7 +1944,9 @@ svint8x4_t test_svread_za8_s8_vg1x4(uint32_t base) __arm_streaming __arm_in("za" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za8_u8_vg1x4(base); @@ -1581,6 +1954,7 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CHECK-LABEL: @test_svread_za16_s16_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1590,10 +1964,13 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_s16_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1603,7 +1980,9 @@ svuint8x4_t test_svread_za8_u8_vg1x4(uint32_t base) __arm_streaming __arm_in("za // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_s16_vg1x4(base); @@ -1611,6 +1990,7 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za16_u16_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1620,10 +2000,13 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_u16_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1633,7 +2016,9 @@ svint16x4_t test_svread_za16_s16_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_u16_vg1x4(base); @@ -1641,6 +2026,7 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za16_bf16_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -1650,10 +2036,13 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svread_za16_bf16_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -1663,7 +2052,9 @@ svuint16x4_t test_svread_za16_u16_vg1x4(uint32_t base) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_bf16_vg1x4(base); @@ -1671,6 +2062,7 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CHECK-LABEL: @test_svread_za16_f16_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -1680,10 +2072,13 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za16_f16_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -1693,7 +2088,9 @@ svbfloat16x4_t test_svread_za16_bf16_vg1x4(uint32_t base) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za16_f16_vg1x4(base); @@ -1701,6 +2098,7 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za32_s32_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1710,10 +2108,13 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_s32_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1723,7 +2124,9 @@ svfloat16x4_t test_svread_za16_f16_vg1x4(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_s32_vg1x4(base); @@ -1731,6 +2134,7 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-LABEL: @test_svread_za32_u32_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1740,10 +2144,13 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_u32_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1753,7 +2160,9 @@ svint32x4_t test_svread_za32_s32_vg1x4(uint32_t base) __arm_streaming __arm_in(" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_u32_vg1x4(base); @@ -1761,6 +2170,7 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za32_f32_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -1770,10 +2180,13 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za32_f32_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -1783,7 +2196,9 @@ svuint32x4_t test_svread_za32_u32_vg1x4(uint32_t base) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za32_f32_vg1x4(base); @@ -1791,6 +2206,7 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_u64_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1800,10 +2216,13 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1813,7 +2232,9 @@ svfloat32x4_t test_svread_za32_f32_vg1x4(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_u64_vg1x4(base); @@ -1821,6 +2242,7 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-LABEL: @test_svread_za64_f64_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1830,10 +2252,13 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1843,7 +2268,9 @@ svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) __arm_streaming __arm_in( // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_f64_vg1x4(base); @@ -1851,6 +2278,7 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-LABEL: @test_svread_za64_s64_vg1x4( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1860,10 +2288,13 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[BASE:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1873,7 +2304,9 @@ svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) __arm_streaming __arm_in // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) __arm_streaming __arm_in("za") { return svread_za64_s64_vg1x4(base); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c index 195531cf131e38..26804866a7563c 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sqdmulh.c @@ -19,25 +19,27 @@ // CHECK-LABEL: @test_svqdmulh_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x2,,,)(zdn, zm); @@ -45,25 +47,27 @@ svint8x2_t test_svqdmulh_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x2,,,)(zdn, zm); @@ -71,25 +75,27 @@ svint16x2_t test_svqdmulh_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x2,,,)(zdn, zm); @@ -97,25 +103,27 @@ svint32x2_t test_svqdmulh_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x2,,,)(zdn, zm); @@ -125,37 +133,35 @@ svint64x2_t test_svqdmulh_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z26test_svqdmulh_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s8_x4,,,)(zdn, zm); @@ -163,37 +169,35 @@ svint8x4_t test_svqdmulh_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streami // CHECK-LABEL: @test_svqdmulh_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s16_x4,,,)(zdn, zm); @@ -201,37 +205,35 @@ svint16x4_t test_svqdmulh_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s32_x4,,,)(zdn, zm); @@ -239,37 +241,35 @@ svint32x4_t test_svqdmulh_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z27test_svqdmulh_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_single_s64_x4,,,)(zdn, zm); @@ -279,29 +279,27 @@ svint64x4_t test_svqdmulh_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_str // CHECK-LABEL: @test_svqdmulh_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x2,,,)(zdn, zm); @@ -309,29 +307,27 @@ svint8x2_t test_svqdmulh_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x2,,,)(zdn, zm); @@ -339,29 +335,27 @@ svint16x2_t test_svqdmulh_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x2,,,)(zdn, zm); @@ -369,29 +363,27 @@ svint32x2_t test_svqdmulh_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x2,,,)(zdn, zm); @@ -401,45 +393,35 @@ svint64x2_t test_svqdmulh_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svqdmulh_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s8_x4,,,)(zdn, zm); @@ -447,45 +429,35 @@ svint8x4_t test_svqdmulh_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { // CHECK-LABEL: @test_svqdmulh_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s16_x4,,,)(zdn, zm); @@ -493,45 +465,35 @@ svint16x4_t test_svqdmulh_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s32_x4,,,)(zdn, zm); @@ -539,45 +501,35 @@ svint32x4_t test_svqdmulh_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streamin // CHECK-LABEL: @test_svqdmulh_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z20test_svqdmulh_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svqdmulh_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svqdmulh,_s64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c index 6b9d5394ef8e49..aca66e0d78d667 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -24,16 +24,12 @@ // x2 // CHECK-LABEL: @test_svsub_write_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_s32j11svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { @@ -42,16 +38,12 @@ void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t // CHECK-LABEL: @test_svsub_write_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_u32j12svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { @@ -60,16 +52,12 @@ void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32 // CHECK-LABEL: @test_svsub_write_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_s64j11svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { @@ -78,16 +66,12 @@ void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t // CHECK-LABEL: @test_svsub_write_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_u64j12svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { @@ -98,20 +82,12 @@ void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64 // CHECK-LABEL: @test_svsub_write_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_s32j11svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_inout("za") { @@ -120,20 +96,12 @@ void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t // CHECK-LABEL: @test_svsub_write_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_u32j12svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_inout("za") { @@ -142,20 +110,12 @@ void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32 // CHECK-LABEL: @test_svsub_write_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_s64j11svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_inout("za") { @@ -164,20 +124,12 @@ void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t // CHECK-LABEL: @test_svsub_write_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_u64j12svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_inout("za") { @@ -192,20 +144,12 @@ void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CHECK-LABEL: @test_svsub_write_multi2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s32j11svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_inout("za") { @@ -214,20 +158,12 @@ void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CHECK-LABEL: @test_svsub_write_multi2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u32j12svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_inout("za") { @@ -236,20 +172,12 @@ void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CHECK-LABEL: @test_svsub_write_multi2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s64j11svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_inout("za") { @@ -258,20 +186,12 @@ void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CHECK-LABEL: @test_svsub_write_multi2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u64j12svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_inout("za") { @@ -282,28 +202,12 @@ void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CHECK-LABEL: @test_svsub_write_multi4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s32j11svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_inout("za") { @@ -312,28 +216,12 @@ void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CHECK-LABEL: @test_svsub_write_multi4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u32j12svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_inout("za") { @@ -342,28 +230,12 @@ void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CHECK-LABEL: @test_svsub_write_multi4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s64j11svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_inout("za") { @@ -372,28 +244,12 @@ void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CHECK-LABEL: @test_svsub_write_multi4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u64j12svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_inout("za") { @@ -408,16 +264,12 @@ void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CHECK-LABEL: @test_svsub_za32_vg1x2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_f32j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_inout("za") { @@ -426,16 +278,12 @@ void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_stre // CHECK-LABEL: @test_svsub_za32_vg1x2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_s32j11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_inout("za") { @@ -444,16 +292,12 @@ void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_stream // CHECK-LABEL: @test_svsub_za32_vg1x2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_u32j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_inout("za") { @@ -462,16 +306,12 @@ void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_strea // CHECK-LABEL: @test_svsub_za64_vg1x2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_f64j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_inout("za") { @@ -480,16 +320,12 @@ void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_stre // CHECK-LABEL: @test_svsub_za64_vg1x2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_s64j11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_inout("za") { @@ -498,16 +334,12 @@ void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_stream // CHECK-LABEL: @test_svsub_za64_vg1x2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_u64j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_inout("za") { @@ -518,20 +350,12 @@ void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_strea // CHECK-LABEL: @test_svsub_za32_vg1x4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_f32j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_inout("za") { @@ -540,20 +364,12 @@ void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_stre // CHECK-LABEL: @test_svsub_za32_vg1x4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_s32j11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_inout("za") { @@ -562,20 +378,12 @@ void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_stream // CHECK-LABEL: @test_svsub_za32_vg1x4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_u32j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_inout("za") { @@ -584,20 +392,12 @@ void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_strea // CHECK-LABEL: @test_svsub_za64_vg1x4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_f64j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_inout("za") { @@ -606,20 +406,12 @@ void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_stre // CHECK-LABEL: @test_svsub_za64_vg1x4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_s64j11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_inout("za") { @@ -628,20 +420,12 @@ void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_stream // CHECK-LABEL: @test_svsub_za64_vg1x4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_u64j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c index 0d3e06f3d3d09c..fa66c4ff190142 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx2.c @@ -19,21 +19,27 @@ // CHECK-LABEL: @test_svunpk_s16_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x2u10__SVInt8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv8i16( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x2)(zn); @@ -41,21 +47,27 @@ svint16x2_t test_svunpk_s16_x2(svint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x2u11__SVUint8_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv8i16( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x2)(zn); @@ -63,21 +75,27 @@ svuint16x2_t test_svunpk_u16_x2(svuint8_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x2u11__SVInt16_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv4i32( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x2)(zn); @@ -85,21 +103,27 @@ svint32x2_t test_svunpk_s32_x2(svint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x2u12__SVUint16_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv4i32( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x2)(zn); @@ -107,21 +131,27 @@ svuint32x2_t test_svunpk_u32_x2(svuint16_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x2u11__SVInt32_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sunpk.x2.nxv2i64( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x2)(zn); @@ -129,21 +159,27 @@ svint64x2_t test_svunpk_s64_x2(svint32_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x2( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x2u12__SVUint32_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uunpk.x2.nxv2i64( [[ZN:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svunpk_u64_x2(svuint32_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c index e7c57e986bca4c..61718f0984ef31 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_unpkx4.c @@ -19,33 +19,35 @@ // CHECK-LABEL: @test_svunpk_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s16_x410svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s16,_s8_x4)(zn); @@ -53,33 +55,35 @@ svint16x4_t test_svunpk_s16_x4(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u16_x411svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u16,_u8_x4)(zn); @@ -87,33 +91,35 @@ svuint16x4_t test_svunpk_u16_x4(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s32_x411svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s32,_s16_x4)(zn); @@ -121,33 +127,35 @@ svint32x4_t test_svunpk_s32_x4(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u32_x412svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u32,_u16_x4)(zn); @@ -155,33 +163,35 @@ svuint32x4_t test_svunpk_u32_x4(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_s64_x411svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_s64,_s32_x4)(zn); @@ -189,33 +199,35 @@ svint64x4_t test_svunpk_s64_x4(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svunpk_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svunpk_u64_x412svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uunpk.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svunpk_u64_x4(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svunpk_u64,_u32_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c index dd8b9fa0755b4a..d9445ef03b8c1d 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vdot.c @@ -16,16 +16,12 @@ // CHECK-LABEL: @test_svvdot_lane_za32_bf16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svvdot_lane_za32_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za32_bf16_vg1x2(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -34,16 +30,12 @@ void test_svvdot_lane_za32_bf16_vg1x2(uint32_t slice_base, svbfloat16x2_t zn, sv // CHECK-LABEL: @test_svvdot_lane_za32_f16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svvdot_lane_za32_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za32_f16_vg1x2(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { @@ -52,16 +44,12 @@ void test_svvdot_lane_za32_f16_vg1x2(uint32_t slice_base, svfloat16x2_t zn, svfl // CHECK-LABEL: @test_svvdot_lane_za32_s16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svvdot_lane_za32_s16_vg1x2j11svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za32_s16_vg1x2(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -70,16 +58,12 @@ void test_svvdot_lane_za32_s16_vg1x2(uint32_t slice_base, svint16x2_t zn, svint1 // CHECK-LABEL: @test_svvdot_lane_za32_u16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svvdot_lane_za32_u16_vg1x2j12svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za32_u16_vg1x2(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -88,20 +72,12 @@ void test_svvdot_lane_za32_u16_vg1x2(uint32_t slice_base, svuint16x2_t zn, svuin // CHECK-LABEL: @test_svvdot_lane_za32_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svvdot_lane_za32_s8_vg1x4j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -110,20 +86,12 @@ void test_svvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t // CHECK-LABEL: @test_svvdot_lane_za32_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svvdot_lane_za32_u8_vg1x4j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { @@ -132,20 +100,12 @@ void test_svvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8 // CHECK-LABEL: @test_svvdot_lane_za64_s16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svvdot_lane_za64_s16_vg1x4j11svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.svdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za64_s16_vg1x4(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { @@ -154,20 +114,12 @@ void test_svvdot_lane_za64_s16_vg1x4(uint32_t slice_base, svint16x4_t zn, svint1 // CHECK-LABEL: @test_svvdot_lane_za64_u16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z31test_svvdot_lane_za64_u16_vg1x4j12svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 1) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.uvdot.lane.za64.vg1x4.nxv8i16(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 1) // CPP-CHECK-NEXT: ret void // void test_svvdot_lane_za64_u16_vg1x4(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { @@ -177,20 +129,12 @@ void test_svvdot_lane_za64_u16_vg1x4(uint32_t slice_base, svuint16x4_t zn, svuin // CHECK-LABEL: @test_svsuvdot_lane_za32_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svsuvdot_lane_za32_s8_vg1x4j10svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svsuvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { @@ -200,20 +144,12 @@ void test_svsuvdot_lane_za32_s8_vg1x4(uint32_t slice_base, svint8x4_t zn, svint8 // CHECK-LABEL: @test_svusvdot_lane_za32_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z32test_svusvdot_lane_za32_u8_vg1x4j11svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]], i32 3) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 [[SLICE_BASE:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]], i32 3) // CPP-CHECK-NEXT: ret void // void test_svusvdot_lane_za32_u8_vg1x4(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c index 425fa55503174c..c118a7192c6ca8 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c @@ -25,25 +25,27 @@ // CHECK-LABEL: @test_svadd_vector_single2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); @@ -51,25 +53,27 @@ svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); @@ -77,25 +81,27 @@ svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); @@ -103,25 +109,27 @@ svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); @@ -129,25 +137,27 @@ svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); @@ -155,25 +165,27 @@ svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); @@ -181,25 +193,27 @@ svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); @@ -207,25 +221,27 @@ svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); @@ -236,37 +252,35 @@ svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); @@ -274,37 +288,35 @@ svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_stream // CHECK-LABEL: @test_svadd_vector_single4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); @@ -312,37 +324,35 @@ svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_str // CHECK-LABEL: @test_svadd_vector_single4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); @@ -350,37 +360,35 @@ svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); @@ -388,37 +396,35 @@ svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); @@ -426,37 +432,35 @@ svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); @@ -464,37 +468,35 @@ svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm // CHECK-LABEL: @test_svadd_vector_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); @@ -502,37 +504,35 @@ svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_st // CHECK-LABEL: @test_svadd_vector_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c index 9a6fc712143541..3e47a3ecc17c0b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_qrshr.c @@ -20,17 +20,13 @@ // CHECK-LABEL: @test_svsqrshr_u16_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshr_u16_u32_x412svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svsqrshr_u16_u32_x4(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshr,_n,_u16,_u32_x2,)(zn, 16); @@ -38,17 +34,13 @@ svuint16_t test_svsqrshr_u16_u32_x4(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshr_s16_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshr_s16_s32_x411svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_svsqrshr_s16_s32_x4(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshr,_n,_s16,_s32_x2,)(zn, 16); @@ -56,21 +48,13 @@ svint16_t test_svsqrshr_s16_s32_x4(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshr_u8_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svsqrshr_u8_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svsqrshr_u8_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshr,_n,_u8,_u32_x4,)(zn, 8); @@ -78,21 +62,13 @@ svuint8_t test_svsqrshr_u8_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshr_s8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z23test_svsqrshr_s8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint8_t test_svsqrshr_s8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshr,_n,_s8,_s32_x4,)(zn, 8); @@ -100,21 +76,13 @@ svint8_t test_svsqrshr_s8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshr_u16_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshr_u16_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshr.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svsqrshr_u16_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshr,_n,_u16,_u64_x4,)(zn, 16); @@ -122,21 +90,13 @@ svuint16_t test_svsqrshr_u16_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshr_s16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshr_s16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshr.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_svsqrshr_s16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshr,_n,_s16,_s64_x4,)(zn, 16); @@ -146,21 +106,13 @@ svint16_t test_svsqrshr_s16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshrn_u8_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshrn_u8_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svsqrshrn_u8_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshrn,_n,_u8,_u32_x4,)(zn, 8); @@ -168,21 +120,13 @@ svuint8_t test_svsqrshrn_u8_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshrn_s8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshrn_s8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint8_t test_svsqrshrn_s8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshrn,_n,_s8,_s32_x4,)(zn, 8); @@ -190,21 +134,13 @@ svint8_t test_svsqrshrn_s8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshrn_u16_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svsqrshrn_u16_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svsqrshrn_u16_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshrn,_n,_u16,_u64_x4,)(zn, 16); @@ -212,21 +148,13 @@ svuint16_t test_svsqrshrn_u16_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshrn_s16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svsqrshrn_s16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_svsqrshrn_s16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshrn,_n,_s16,_s64_x4,)(zn, 16); @@ -236,17 +164,13 @@ svint16_t test_svsqrshrn_s16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsvqrshru_u16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svsvqrshru_u16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svsvqrshru_u16_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshru,_n,_u16,_s32_x2,)(zn, 16); @@ -254,21 +178,13 @@ svuint16_t test_svsvqrshru_u16_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshru_u8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svsqrshru_u8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 8) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svsqrshru_u8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshru,_n,_u8,_s32_x4,)(zn, 8); @@ -276,21 +192,13 @@ svuint8_t test_svsqrshru_u8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshru_u16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svsqrshru_u16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshru.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svsqrshru_u16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshru,_n,_u16,_s64_x4,)(zn, 16); @@ -300,21 +208,13 @@ svuint16_t test_svsqrshru_u16_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshrun_u8_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 32) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 32) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svsqrshrun_u8_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 32) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 32) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svsqrshrun_u8_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshrun,_n,_u8,_s32_x4,)(zn, 32); @@ -322,21 +222,13 @@ svuint8_t test_svsqrshrun_u8_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svsqrshrun_u16_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 64) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 64) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z26test_svsqrshrun_u16_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], i32 64) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]], i32 64) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svsqrshrun_u16_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svqrshrun,_n,_u16,_s64_x4,)(zn, 64); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c index f69a94c15b8580..87160444e3c0d1 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_rshl.c @@ -19,25 +19,27 @@ // CHECK-LABEL: @test_svrshl_single_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x210svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x2,,,)(zdn, zm); @@ -45,25 +47,27 @@ svint8x2_t test_svrshl_single_s8_x2(svint8x2_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x211svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x2,,,)(zdn, zm); @@ -71,25 +75,27 @@ svint16x2_t test_svrshl_single_s16_x2(svint16x2_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x2,,,)(zdn, zm); @@ -97,25 +103,27 @@ svint32x2_t test_svrshl_single_s32_x2(svint32x2_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x211svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x2,,,)(zdn, zm); @@ -123,25 +131,27 @@ svint64x2_t test_svrshl_single_s64_x2(svint64x2_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x211svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x2,,,)(zdn, zm); @@ -149,25 +159,27 @@ svuint8x2_t test_svrshl_single_u8_x2(svuint8x2_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x212svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x2,,,)(zdn, zm); @@ -175,25 +187,27 @@ svuint16x2_t test_svrshl_single_u16_x2(svuint16x2_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x2,,,)(zdn, zm); @@ -201,25 +215,27 @@ svuint32x2_t test_svrshl_single_u32_x2(svuint32x2_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x212svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x2,,,)(zdn, zm); @@ -229,37 +245,35 @@ svuint64x2_t test_svrshl_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_s8_x410svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s8_x4,,,)(zdn, zm); @@ -267,37 +281,35 @@ svint8x4_t test_svrshl_single_s8_x4(svint8x4_t zdn, svint8_t zm) __arm_streaming // CHECK-LABEL: @test_svrshl_single_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s16_x411svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s16_x4,,,)(zdn, zm); @@ -305,37 +317,35 @@ svint16x4_t test_svrshl_single_s16_x4(svint16x4_t zdn, svint16_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s32_x411svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s32_x4,,,)(zdn, zm); @@ -343,37 +353,35 @@ svint32x4_t test_svrshl_single_s32_x4(svint32x4_t zdn, svint32_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_s64_x411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_s64_x4,,,)(zdn, zm); @@ -381,37 +389,35 @@ svint64x4_t test_svrshl_single_s64_x4(svint64x4_t zdn, svint64_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_single_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_single_u8_x411svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u8_x4,,,)(zdn, zm); @@ -419,37 +425,35 @@ svuint8x4_t test_svrshl_single_u8_x4(svuint8x4_t zdn, svuint8_t zm) __arm_stream // CHECK-LABEL: @test_svrshl_single_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u16_x412svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u16_x4,,,)(zdn, zm); @@ -457,37 +461,35 @@ svuint16x4_t test_svrshl_single_u16_x4(svuint16x4_t zdn, svuint16_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u32_x412svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u32_x4,,,)(zdn, zm); @@ -495,37 +497,35 @@ svuint32x4_t test_svrshl_single_u32_x4(svuint32x4_t zdn, svuint32_t zm) __arm_st // CHECK-LABEL: @test_svrshl_single_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z25test_svrshl_single_u64_x412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_single_u64_x4,,,)(zdn, zm); @@ -535,29 +535,27 @@ svuint64x4_t test_svrshl_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_st // CHECK-LABEL: @test_svrshl_multi_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x210svint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x2,,,)(zdn, zm); @@ -565,29 +563,27 @@ svint8x2_t test_svrshl_multi_s8_x2(svint8x2_t zdn, svint8x2_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x211svint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x2,,,)(zdn, zm); @@ -595,29 +591,27 @@ svint16x2_t test_svrshl_multi_s16_x2(svint16x2_t zdn, svint16x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x211svint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x2,,,)(zdn, zm); @@ -625,29 +619,27 @@ svint32x2_t test_svrshl_multi_s32_x2(svint32x2_t zdn, svint32x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x211svint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x2,,,)(zdn, zm); @@ -655,29 +647,27 @@ svint64x2_t test_svrshl_multi_s64_x2(svint64x2_t zdn, svint64x2_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x211svuint8x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x2,,,)(zdn, zm); @@ -685,29 +675,27 @@ svuint8x2_t test_svrshl_multi_u8_x2(svuint8x2_t zdn, svuint8x2_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x212svuint16x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x2,,,)(zdn, zm); @@ -715,29 +703,27 @@ svuint16x2_t test_svrshl_multi_u16_x2(svuint16x2_t zdn, svuint16x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x212svuint32x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x2,,,)(zdn, zm); @@ -745,29 +731,27 @@ svuint32x2_t test_svrshl_multi_u32_x2(svuint32x2_t zdn, svuint32x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x212svuint64x2_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x2,,,)(zdn, zm); @@ -777,45 +761,35 @@ svuint64x2_t test_svrshl_multi_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_s8_x410svint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s8_x4,,,)(zdn, zm); @@ -823,45 +797,35 @@ svint8x4_t test_svrshl_multi_s8_x4(svint8x4_t zdn, svint8x4_t zm) __arm_streamin // CHECK-LABEL: @test_svrshl_multi_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s16_x411svint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s16_x4,,,)(zdn, zm); @@ -869,45 +833,35 @@ svint16x4_t test_svrshl_multi_s16_x4(svint16x4_t zdn, svint16x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s32_x411svint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s32_x4,,,)(zdn, zm); @@ -915,45 +869,35 @@ svint32x4_t test_svrshl_multi_s32_x4(svint32x4_t zdn, svint32x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_s64_x411svint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_s64_x4,,,)(zdn, zm); @@ -961,45 +905,35 @@ svint64x4_t test_svrshl_multi_s64_x4(svint64x4_t zdn, svint64x4_t zm) __arm_stre // CHECK-LABEL: @test_svrshl_multi_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z23test_svrshl_multi_u8_x411svuint8x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZDN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZM]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u8_x4,,,)(zdn, zm); @@ -1007,45 +941,35 @@ svuint8x4_t test_svrshl_multi_u8_x4(svuint8x4_t zdn, svuint8x4_t zm) __arm_strea // CHECK-LABEL: @test_svrshl_multi_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u16_x412svuint16x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZDN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u16_x4,,,)(zdn, zm); @@ -1053,45 +977,35 @@ svuint16x4_t test_svrshl_multi_u16_x4(svuint16x4_t zdn, svuint16x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u32_x412svuint32x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZDN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u32_x4,,,)(zdn, zm); @@ -1099,45 +1013,35 @@ svuint32x4_t test_svrshl_multi_u32_x4(svuint32x4_t zdn, svuint32x4_t zm) __arm_s // CHECK-LABEL: @test_svrshl_multi_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z24test_svrshl_multi_u64_x412svuint64x4_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZDN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( [[ZDN_COERCE0:%.*]], [[ZDN_COERCE1:%.*]], [[ZDN_COERCE2:%.*]], [[ZDN_COERCE3:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]], [[ZM_COERCE2:%.*]], [[ZM_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svrshl_multi_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streaming { return SVE_ACLE_FUNC(svrshl,_u64_x4,,,)(zdn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c index 47d449515ce669..a95f89faf77834 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx2.c @@ -19,29 +19,27 @@ // CHECK-LABEL: @test_svsel_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x2u11__SVCount_t10svint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x2)(pn, zn, zm); @@ -49,29 +47,27 @@ svint8x2_t test_svsel_s8_x2(svcount_t pn, svint8x2_t zn, svint8x2_t zm) __arm_st // CHECK-LABEL: @test_svsel_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x2u11__SVCount_t11svuint8x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZM]], i64 16) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x2)(pn, zn, zm); @@ -81,29 +77,27 @@ svuint8x2_t test_svsel_u8_x2(svcount_t pn, svuint8x2_t zn, svuint8x2_t zm) __arm // CHECK-LABEL: @test_svsel_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x2u11__SVCount_t11svint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x2)(pn, zn, zm); @@ -111,29 +105,27 @@ svint16x2_t test_svsel_s16_x2(svcount_t pn, svint16x2_t zn, svint16x2_t zm) __ar // CHECK-LABEL: @test_svsel_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x2u11__SVCount_t12svuint16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x2)(pn, zn, zm); @@ -141,29 +133,27 @@ svuint16x2_t test_svsel_u16_x2(svcount_t pn, svuint16x2_t zn, svuint16x2_t zm) _ // CHECK-LABEL: @test_svsel_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x2u11__SVCount_t13svfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x2)(pn, zn, zm); @@ -171,29 +161,27 @@ svfloat16x2_t test_svsel_f16_x2(svcount_t pn, svfloat16x2_t zn, svfloat16x2_t zm // CHECK-LABEL: @test_svsel_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x2u11__SVCount_t14svbfloat16x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x2)(pn, zn, zm); @@ -203,29 +191,27 @@ svbfloat16x2_t test_svsel_bf16_x2(svcount_t pn, svbfloat16x2_t zn, svbfloat16x2_ // CHECK-LABEL: @test_svsel_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x2u11__SVCount_t11svint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x2)(pn, zn, zm); @@ -233,29 +219,27 @@ svint32x2_t test_svsel_s32_x2(svcount_t pn, svint32x2_t zn, svint32x2_t zm) __ar // CHECK-LABEL: @test_svsel_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x2u11__SVCount_t12svuint32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x2)(pn, zn, zm); @@ -263,29 +247,27 @@ svuint32x2_t test_svsel_u32_x2(svcount_t pn, svuint32x2_t zn, svuint32x2_t zm) _ // CHECK-LABEL: @test_svsel_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x2u11__SVCount_t13svfloat32x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x2)(pn, zn, zm); @@ -295,29 +277,27 @@ svfloat32x2_t test_svsel_f32_x2(svcount_t pn, svfloat32x2_t zn, svfloat32x2_t zm // CHECK-LABEL: @test_svsel_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x2u11__SVCount_t11svint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x2)(pn, zn, zm); @@ -325,29 +305,27 @@ svint64x2_t test_svsel_s64_x2(svcount_t pn, svint64x2_t zn, svint64x2_t zm) __ar // CHECK-LABEL: @test_svsel_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x2u11__SVCount_t12svuint64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x2)(pn, zn, zm); @@ -355,29 +333,27 @@ svuint64x2_t test_svsel_u64_x2(svcount_t pn, svuint64x2_t zn, svuint64x2_t zm) _ // CHECK-LABEL: @test_svsel_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x2u11__SVCount_t13svfloat64x2_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.sel.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZM_COERCE0:%.*]], [[ZM_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svsel_f64_x2(svcount_t pn, svfloat64x2_t zn, svfloat64x2_t zm) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x2)(pn, zn, zm); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c index 88de2ec2f8ace0..997b6acf962443 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_selx4.c @@ -19,45 +19,35 @@ // CHECK-LABEL: @test_svsel_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svsel_s8_x4u11__SVCount_t10svint8x4_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s8_x4)(pn, zn1, zn2); @@ -65,45 +55,35 @@ svint8x4_t test_svsel_s8_x4(svcount_t pn, svint8x4_t zn1, svint8x4_t zn2) __arm_ // CHECK-LABEL: @test_svsel_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 48) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svsel_u8_x4u11__SVCount_t11svuint8x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN1]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN2]], i64 48) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 16) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP12]], [[TMP13]], i64 32) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP14]], [[TMP15]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svuint8x4_t zn3, svuint8x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u8_x4)(pn, zn1, zn2); @@ -113,45 +93,35 @@ svuint8x4_t test_svsel_u8_x4(svcount_t pn, svuint8x4_t zn1, svuint8x4_t zn2, svu // CHECK-LABEL: @test_svsel_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s16_x4u11__SVCount_t11svint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, svint16x4_t zn3, svint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s16_x4)(pn, zn1, zn2); @@ -159,45 +129,35 @@ svint16x4_t test_svsel_s16_x4(svcount_t pn, svint16x4_t zn1, svint16x4_t zn2, sv // CHECK-LABEL: @test_svsel_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u16_x4u11__SVCount_t12svuint16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN2]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, svuint16x4_t zn3, svuint16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u16_x4)(pn, zn1, zn2); @@ -205,45 +165,35 @@ svuint16x4_t test_svsel_u16_x4(svcount_t pn, svuint16x4_t zn1, svuint16x4_t zn2, // CHECK-LABEL: @test_svsel_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f16_x4u11__SVCount_t13svfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN2]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t zn2, svfloat16x4_t zn3, svfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f16_x4)(pn, zn1, zn2); @@ -251,45 +201,35 @@ svfloat16x4_t test_svsel_f16_x4(svcount_t pn, svfloat16x4_t zn1, svfloat16x4_t z // CHECK-LABEL: @test_svsel_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2]], i64 24) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svsel_bf16_x4u11__SVCount_t14svbfloat16x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN1]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN2]], i64 24) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 8) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP12]], [[TMP13]], i64 16) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP14]], [[TMP15]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv8bf16(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4_t zn2, svbfloat16x4_t zn3, svbfloat16x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_bf16_x4)(pn, zn1, zn2); @@ -299,45 +239,35 @@ svbfloat16x4_t test_svsel_bf16_x4(svcount_t pn, svbfloat16x4_t zn1, svbfloat16x4 // CHECK-LABEL: @test_svsel_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s32_x4u11__SVCount_t11svint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, svint32x4_t zn3, svint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s32_x4)(pn, zn1, zn2); @@ -345,45 +275,35 @@ svint32x4_t test_svsel_s32_x4(svcount_t pn, svint32x4_t zn1, svint32x4_t zn2, sv // CHECK-LABEL: @test_svsel_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u32_x4u11__SVCount_t12svuint32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN2]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, svuint32x4_t zn3, svuint32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u32_x4)(pn, zn1, zn2); @@ -391,45 +311,35 @@ svuint32x4_t test_svsel_u32_x4(svcount_t pn, svuint32x4_t zn1, svuint32x4_t zn2, // CHECK-LABEL: @test_svsel_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2]], i64 12) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f32_x4u11__SVCount_t13svfloat32x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN1]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN2]], i64 12) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 4) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP12]], [[TMP13]], i64 8) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP14]], [[TMP15]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t zn2, svfloat32x4_t zn3, svfloat32x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f32_x4)(pn, zn1, zn2); @@ -439,45 +349,35 @@ svfloat32x4_t test_svsel_f32_x4(svcount_t pn, svfloat32x4_t zn1, svfloat32x4_t z // CHECK-LABEL: @test_svsel_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_s64_x4u11__SVCount_t11svint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, svint64x4_t zn3, svint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_s64_x4)(pn, zn1, zn2); @@ -485,45 +385,35 @@ svint64x4_t test_svsel_s64_x4(svcount_t pn, svint64x4_t zn1, svint64x4_t zn2, sv // CHECK-LABEL: @test_svsel_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_u64_x4u11__SVCount_t12svuint64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN2]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, svuint64x4_t zn3, svuint64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_u64_x4)(pn, zn1, zn2); @@ -531,45 +421,35 @@ svuint64x4_t test_svsel_u64_x4(svcount_t pn, svuint64x4_t zn1, svuint64x4_t zn2, // CHECK-LABEL: @test_svsel_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2:%.*]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2]], i64 6) -// CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CHECK-NEXT: ret [[TMP16]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svsel_f64_x4u11__SVCount_t13svfloat64x4_tS0_S0_S0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN1]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN2]], i64 6) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP8]], 0 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP8]], 1 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 2) -// CPP-CHECK-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP8]], 2 -// CPP-CHECK-NEXT: [[TMP14:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP12]], [[TMP13]], i64 4) -// CPP-CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[TMP8]], 3 -// CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP14]], [[TMP15]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP16]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.sel.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], [[ZN1_COERCE0:%.*]], [[ZN1_COERCE1:%.*]], [[ZN1_COERCE2:%.*]], [[ZN1_COERCE3:%.*]], [[ZN2_COERCE0:%.*]], [[ZN2_COERCE1:%.*]], [[ZN2_COERCE2:%.*]], [[ZN2_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svsel_f64_x4(svcount_t pn, svfloat64x4_t zn1, svfloat64x4_t zn2, svfloat64x4_t zn3, svfloat64x4_t zn4) __arm_streaming { return SVE_ACLE_FUNC(svsel,_f64_x4)(pn, zn1, zn2); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c index 4b523fb544d252..de605bab67cc35 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx2.c @@ -20,25 +20,27 @@ // CHECK-LABEL: @test_svuzp_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x2)(zn); @@ -46,25 +48,27 @@ svint8x2_t test_svuzp_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x2)(zn); @@ -74,25 +78,27 @@ svuint8x2_t test_svuzp_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x2)(zn); @@ -100,25 +106,27 @@ svint16x2_t test_svuzp_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x2)(zn); @@ -126,25 +134,27 @@ svuint16x2_t test_svuzp_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x2)(zn); @@ -152,25 +162,27 @@ svfloat16x2_t test_svuzp_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x2)(zn); @@ -180,25 +192,27 @@ svbfloat16x2_t test_svuzp_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x2)(zn); @@ -206,25 +220,27 @@ svint32x2_t test_svuzp_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x2)(zn); @@ -232,25 +248,27 @@ svuint32x2_t test_svuzp_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x2)(zn); @@ -260,25 +278,27 @@ svfloat32x2_t test_svuzp_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x2)(zn); @@ -286,25 +306,27 @@ svint64x2_t test_svuzp_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x2)(zn); @@ -312,25 +334,27 @@ svuint64x2_t test_svuzp_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzp.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x2)(zn); @@ -340,25 +364,27 @@ svfloat64x2_t test_svuzp_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x2)(zn); @@ -366,25 +392,27 @@ svint8x2_t test_svuzpq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x2)(zn); @@ -392,25 +420,27 @@ svuint8x2_t test_svuzpq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x2)(zn); @@ -418,25 +448,27 @@ svint16x2_t test_svuzpq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x2)(zn); @@ -444,25 +476,27 @@ svuint16x2_t test_svuzpq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x2)(zn); @@ -470,25 +504,27 @@ svfloat16x2_t test_svuzpq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x2)(zn); @@ -496,25 +532,27 @@ svbfloat16x2_t test_svuzpq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x2)(zn); @@ -522,25 +560,27 @@ svint32x2_t test_svuzpq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x2)(zn); @@ -548,25 +588,27 @@ svuint32x2_t test_svuzpq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x2)(zn); @@ -574,25 +616,27 @@ svfloat32x2_t test_svuzpq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x2)(zn); @@ -600,25 +644,27 @@ svint64x2_t test_svuzpq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x2)(zn); @@ -626,25 +672,27 @@ svuint64x2_t test_svuzpq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.uzpq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svuzpq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c index 8f8b7fbb5bbf46..aa210f59508b59 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_uzpx4.c @@ -20,37 +20,35 @@ // CHECK-LABEL: @test_svuzp_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s8_x4)(zn); @@ -58,37 +56,35 @@ svint8x4_t test_svuzp_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svuzp_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u8_x4)(zn); @@ -98,37 +94,35 @@ svuint8x4_t test_svuzp_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s16_x4)(zn); @@ -136,37 +130,35 @@ svint16x4_t test_svuzp_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u16_x4)(zn); @@ -174,37 +166,35 @@ svuint16x4_t test_svuzp_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f16_x4)(zn); @@ -212,37 +202,35 @@ svfloat16x4_t test_svuzp_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzp_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_bf16_x4)(zn); @@ -252,37 +240,35 @@ svbfloat16x4_t test_svuzp_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s32_x4)(zn); @@ -290,37 +276,35 @@ svint32x4_t test_svuzp_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u32_x4)(zn); @@ -328,37 +312,35 @@ svuint32x4_t test_svuzp_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f32_x4)(zn); @@ -368,37 +350,35 @@ svfloat32x4_t test_svuzp_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_s64_x4)(zn); @@ -406,37 +386,35 @@ svint64x4_t test_svuzp_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_u64_x4)(zn); @@ -444,37 +422,35 @@ svuint64x4_t test_svuzp_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzp_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzp_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzp,_f64_x4)(zn); @@ -484,37 +460,35 @@ svfloat64x4_t test_svuzp_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s8_x4)(zn); @@ -522,37 +496,35 @@ svint8x4_t test_svuzpq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svuzpq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u8_x4)(zn); @@ -560,37 +532,35 @@ svuint8x4_t test_svuzpq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s16_x4)(zn); @@ -598,37 +568,35 @@ svint16x4_t test_svuzpq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u16_x4)(zn); @@ -636,37 +604,35 @@ svuint16x4_t test_svuzpq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f16_x4)(zn); @@ -674,37 +640,35 @@ svfloat16x4_t test_svuzpq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svuzpq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_bf16_x4)(zn); @@ -712,37 +676,35 @@ svbfloat16x4_t test_svuzpq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s32_x4)(zn); @@ -750,37 +712,35 @@ svint32x4_t test_svuzpq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u32_x4)(zn); @@ -788,37 +748,35 @@ svuint32x4_t test_svuzpq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f32_x4)(zn); @@ -826,37 +784,35 @@ svfloat32x4_t test_svuzpq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_s64_x4)(zn); @@ -864,37 +820,35 @@ svint64x4_t test_svuzpq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_u64_x4)(zn); @@ -902,37 +856,35 @@ svuint64x4_t test_svuzpq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svuzpq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svuzpq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svuzpq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svuzpq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c index bc72cdb65e38b9..a29c347e3197f3 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx2.c @@ -19,25 +19,27 @@ // CHECK-LABEL: @test_svzip_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x2)(zn); @@ -45,25 +47,27 @@ svint8x2_t test_svzip_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x2)(zn); @@ -73,25 +77,27 @@ svuint8x2_t test_svzip_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x2)(zn); @@ -99,25 +105,27 @@ svint16x2_t test_svzip_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x2)(zn); @@ -125,25 +133,27 @@ svuint16x2_t test_svzip_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x2)(zn); @@ -151,25 +161,27 @@ svfloat16x2_t test_svzip_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x2)(zn); @@ -179,25 +191,27 @@ svbfloat16x2_t test_svzip_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x2)(zn); @@ -205,25 +219,27 @@ svint32x2_t test_svzip_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x2)(zn); @@ -231,25 +247,27 @@ svuint32x2_t test_svzip_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x2)(zn); @@ -259,25 +277,27 @@ svfloat32x2_t test_svzip_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x2)(zn); @@ -285,25 +305,27 @@ svint64x2_t test_svzip_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x2)(zn); @@ -311,25 +333,27 @@ svuint64x2_t test_svzip_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zip.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x2)(zn); @@ -339,25 +363,27 @@ svfloat64x2_t test_svzip_f64_x2(svfloat64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x210svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x2)(zn); @@ -365,25 +391,27 @@ svint8x2_t test_svzipq_s8_x2(svint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x211svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x2)(zn); @@ -391,25 +419,27 @@ svuint8x2_t test_svzipq_u8_x2(svuint8x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x211svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x2)(zn); @@ -417,25 +447,27 @@ svint16x2_t test_svzipq_s16_x2(svint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x212svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x2)(zn); @@ -443,25 +475,27 @@ svuint16x2_t test_svzipq_u16_x2(svuint16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x213svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x2)(zn); @@ -469,25 +503,27 @@ svfloat16x2_t test_svzipq_f16_x2(svfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x214svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x2)(zn); @@ -495,25 +531,27 @@ svbfloat16x2_t test_svzipq_bf16_x2(svbfloat16x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x2)(zn); @@ -521,25 +559,27 @@ svint32x2_t test_svzipq_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x2)(zn); @@ -547,25 +587,27 @@ svuint32x2_t test_svzipq_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x2)(zn); @@ -573,25 +615,27 @@ svfloat32x2_t test_svzipq_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x211svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x2)(zn); @@ -599,25 +643,27 @@ svint64x2_t test_svzipq_s64_x2(svint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x212svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x2)(zn); @@ -625,25 +671,27 @@ svuint64x2_t test_svzipq_u64_x2(svuint64x2_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x213svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.zipq.x2.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svzipq_f64_x2(svfloat64x2_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x2)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c index e4ad0bbda97669..be40ecb4bcaa35 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_zipx4.c @@ -19,37 +19,35 @@ // CHECK-LABEL: @test_svzip_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svzip_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s8_x4)(zn); @@ -57,37 +55,35 @@ svint8x4_t test_svzip_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z16test_svzip_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u8_x4)(zn); @@ -97,37 +93,35 @@ svuint8x4_t test_svzip_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s16_x4)(zn); @@ -135,37 +129,35 @@ svint16x4_t test_svzip_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u16_x4)(zn); @@ -173,37 +165,35 @@ svuint16x4_t test_svzip_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f16_x4)(zn); @@ -211,37 +201,35 @@ svfloat16x4_t test_svzip_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzip_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_bf16_x4)(zn); @@ -251,37 +239,35 @@ svbfloat16x4_t test_svzip_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s32_x4)(zn); @@ -289,37 +275,35 @@ svint32x4_t test_svzip_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u32_x4)(zn); @@ -327,37 +311,35 @@ svuint32x4_t test_svzip_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f32_x4)(zn); @@ -367,37 +349,35 @@ svfloat32x4_t test_svzip_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_s64_x4)(zn); @@ -405,37 +385,35 @@ svint64x4_t test_svzip_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_u64_x4)(zn); @@ -443,37 +421,35 @@ svuint64x4_t test_svzip_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzip_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzip_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zip.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzip,_f64_x4)(zn); @@ -483,37 +459,35 @@ svfloat64x4_t test_svzip_f64_x4(svfloat64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_s8_x410svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s8_x4)(zn); @@ -521,37 +495,35 @@ svint8x4_t test_svzipq_s8_x4(svint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z17test_svzipq_u8_x411svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv16i8( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u8_x4)(zn); @@ -559,37 +531,35 @@ svuint8x4_t test_svzipq_u8_x4(svuint8x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s16_x411svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s16_x4)(zn); @@ -597,37 +567,35 @@ svint16x4_t test_svzipq_s16_x4(svint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u16_x412svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8i16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u16_x4)(zn); @@ -635,37 +603,35 @@ svuint16x4_t test_svzipq_u16_x4(svuint16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f16_x413svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8f16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f16_x4)(zn); @@ -673,37 +639,35 @@ svfloat16x4_t test_svzipq_f16_x4(svfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_bf16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z19test_svzipq_bf16_x414svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv8bf16( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_bf16_x4)(zn); @@ -711,37 +675,35 @@ svbfloat16x4_t test_svzipq_bf16_x4(svbfloat16x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s32_x411svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s32_x4)(zn); @@ -749,37 +711,35 @@ svint32x4_t test_svzipq_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u32_x412svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u32_x4)(zn); @@ -787,37 +747,35 @@ svuint32x4_t test_svzipq_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f32_x413svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv4f32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f32_x4)(zn); @@ -825,37 +783,35 @@ svfloat32x4_t test_svzipq_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_s64_x411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_s64_x4)(zn); @@ -863,37 +819,35 @@ svint64x4_t test_svzipq_s64_x4(svint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_u64_x412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2i64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_u64_x4)(zn); @@ -901,37 +855,35 @@ svuint64x4_t test_svzipq_u64_x4(svuint64x4_t zn) __arm_streaming { // CHECK-LABEL: @test_svzipq_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // // CPP-CHECK-LABEL: @_Z18test_svzipq_f64_x413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.zipq.x4.nxv2f64( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], [[ZN_COERCE2:%.*]], [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svzipq_f64_x4(svfloat64x4_t zn) __arm_streaming { return SVE_ACLE_FUNC(svzipq,_f64_x4)(zn); diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c index e52301aab455bb..1bdca4a12bcbd9 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c @@ -18,16 +18,12 @@ // CHECK-LABEL: @test_svwrite_ver_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_u8_vg2j11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_inout("za") { @@ -36,16 +32,12 @@ void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_ver_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_s8_vg2j10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_inout("za") { @@ -54,16 +46,12 @@ void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_hor_za8_u8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_u8_vg2j11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_inout("za") { @@ -72,16 +60,12 @@ void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_hor_za8_s8_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_s8_vg2j10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming __arm_inout("za") { @@ -90,20 +74,12 @@ void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_hor_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_u8_vg4j11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_inout("za") { @@ -112,20 +88,12 @@ void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_hor_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_s8_vg4j10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_inout("za") { @@ -134,20 +102,12 @@ void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_ver_za8_u8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_u8_vg4j11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_inout("za") { @@ -156,20 +116,12 @@ void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_ver_za8_s8_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_s8_vg4j10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming __arm_inout("za") { @@ -178,16 +130,12 @@ void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_hor_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_u16_vg2j12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_inout("za") { @@ -196,16 +144,12 @@ void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_stream // CHECK-LABEL: @test_svwrite_hor_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svwrite_hor_za16_bf16_vg2j14svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_inout("za") { @@ -214,16 +158,12 @@ void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_str // CHECK-LABEL: @test_svwrite_hor_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_f16_vg2j13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_inout("za") { @@ -232,16 +172,12 @@ void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_hor_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_s16_vg2j11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_inout("za") { @@ -250,16 +186,12 @@ void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_ver_za16_u16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_u16_vg2j12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_inout("za") { @@ -268,16 +200,12 @@ void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) __arm_stream // CHECK-LABEL: @test_svwrite_ver_za16_bf16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svwrite_ver_za16_bf16_vg2j14svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_inout("za") { @@ -286,16 +214,12 @@ void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) __arm_str // CHECK-LABEL: @test_svwrite_ver_za16_f16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_f16_vg2j13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_inout("za") { @@ -304,16 +228,12 @@ void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_ver_za16_s16_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_s16_vg2j11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streaming __arm_inout("za") { @@ -322,20 +242,12 @@ void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_hor_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_u16_vg4j12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_inout("za") { @@ -344,20 +256,12 @@ void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_stream // CHECK-LABEL: @test_svwrite_hor_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svwrite_hor_za16_bf16_vg4j14svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_inout("za") { @@ -366,20 +270,12 @@ void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_str // CHECK-LABEL: @test_svwrite_hor_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_f16_vg4j13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_inout("za") { @@ -388,20 +284,12 @@ void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_hor_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_s16_vg4j11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_inout("za") { @@ -410,20 +298,12 @@ void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_ver_za16_u16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_u16_vg4j12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_inout("za") { @@ -432,20 +312,12 @@ void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) __arm_stream // CHECK-LABEL: @test_svwrite_ver_za16_bf16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z30test_svwrite_ver_za16_bf16_vg4j14svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_inout("za") { @@ -454,20 +326,12 @@ void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) __arm_str // CHECK-LABEL: @test_svwrite_ver_za16_f16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_f16_vg4j13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_inout("za") { @@ -476,20 +340,12 @@ void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_ver_za16_s16_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_s16_vg4j11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streaming __arm_inout("za") { @@ -498,16 +354,12 @@ void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_hor_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_u32_vg2j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_inout("za") { @@ -516,16 +368,12 @@ void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_stream // CHECK-LABEL: @test_svwrite_hor_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_f32_vg2j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_inout("za") { @@ -534,16 +382,12 @@ void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_hor_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_s32_vg2j11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_inout("za") { @@ -552,16 +396,12 @@ void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_ver_za32_u32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_u32_vg2j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_inout("za") { @@ -570,16 +410,12 @@ void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) __arm_stream // CHECK-LABEL: @test_svwrite_ver_za32_f32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_f32_vg2j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_inout("za") { @@ -588,16 +424,12 @@ void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_ver_za32_s32_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_s32_vg2j11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streaming __arm_inout("za") { @@ -606,20 +438,12 @@ void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_hor_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_u32_vg4j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_inout("za") { @@ -628,20 +452,12 @@ void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_stream // CHECK-LABEL: @test_svwrite_hor_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_f32_vg4j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_inout("za") { @@ -650,20 +466,12 @@ void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_hor_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_s32_vg4j11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_inout("za") { @@ -672,20 +480,12 @@ void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_ver_za32_u32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_u32_vg4j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_inout("za") { @@ -694,20 +494,12 @@ void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) __arm_stream // CHECK-LABEL: @test_svwrite_ver_za32_f32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_f32_vg4j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_inout("za") { @@ -716,20 +508,12 @@ void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_ver_za32_s32_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_s32_vg4j11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 3, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streaming __arm_inout("za") { @@ -738,16 +522,12 @@ void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_hor_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_u64_vg2j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_inout("za") { @@ -756,16 +536,12 @@ void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_stream // CHECK-LABEL: @test_svwrite_hor_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_f64_vg2j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_inout("za") { @@ -774,16 +550,12 @@ void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_hor_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_s64_vg2j11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_inout("za") { @@ -792,16 +564,12 @@ void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_ver_za64_u64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_u64_vg2j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_inout("za") { @@ -810,16 +578,12 @@ void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) __arm_stream // CHECK-LABEL: @test_svwrite_ver_za64_f64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_f64_vg2j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_inout("za") { @@ -828,16 +592,12 @@ void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_ver_za64_s64_vg2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_s64_vg2j11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streaming __arm_inout("za") { @@ -846,20 +606,12 @@ void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_hor_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_u64_vg4j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_inout("za") { @@ -868,20 +620,12 @@ void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_stream // CHECK-LABEL: @test_svwrite_ver_za64_u64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_u64_vg4j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_inout("za") { @@ -890,20 +634,12 @@ void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) __arm_stream // CHECK-LABEL: @test_svwrite_hor_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_f64_vg4j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_inout("za") { @@ -912,20 +648,12 @@ void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_hor_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_s64_vg4j11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_inout("za") { @@ -934,20 +662,12 @@ void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_ver_za64_f64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_f64_vg4j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_inout("za") { @@ -956,20 +676,12 @@ void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_ver_za64_s64_vg4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_s64_vg4j11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 7, i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streaming __arm_inout("za") { @@ -978,16 +690,12 @@ void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za8_s8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svwrite_za8_s8_vg1x2j10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za8_s8_vg1x2(uint32_t base, svint8x2_t val) __arm_streaming __arm_inout("za") { @@ -996,16 +704,12 @@ void test_svwrite_za8_s8_vg1x2(uint32_t base, svint8x2_t val) __arm_streaming __ // CHECK-LABEL: @test_svwrite_za8_u8_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svwrite_za8_u8_vg1x2j11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za8_u8_vg1x2(uint32_t base, svuint8x2_t val) __arm_streaming __arm_inout("za") { @@ -1014,16 +718,12 @@ void test_svwrite_za8_u8_vg1x2(uint32_t base, svuint8x2_t val) __arm_streaming _ // CHECK-LABEL: @test_svwrite_za16_s16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za16_s16_vg1x2j11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_s16_vg1x2(uint32_t base, svint16x2_t val) __arm_streaming __arm_inout("za") { @@ -1032,16 +732,12 @@ void test_svwrite_za16_s16_vg1x2(uint32_t base, svint16x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_za16_u16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za16_u16_vg1x2j12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_u16_vg1x2(uint32_t base, svuint16x2_t val) __arm_streaming __arm_inout("za") { @@ -1050,16 +746,12 @@ void test_svwrite_za16_u16_vg1x2(uint32_t base, svuint16x2_t val) __arm_streamin // CHECK-LABEL: @test_svwrite_za16_bf16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svwrite_za16_bf16_vg1x2j14svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8bf16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_bf16_vg1x2(uint32_t base, svbfloat16x2_t val) __arm_streaming __arm_inout("za") { @@ -1068,16 +760,12 @@ void test_svwrite_za16_bf16_vg1x2(uint32_t base, svbfloat16x2_t val) __arm_strea // CHECK-LABEL: @test_svwrite_za16_f16_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za16_f16_vg1x2j13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv8f16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_f16_vg1x2(uint32_t base, svfloat16x2_t val) __arm_streaming __arm_inout("za") { @@ -1086,16 +774,12 @@ void test_svwrite_za16_f16_vg1x2(uint32_t base, svfloat16x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za32_s32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za32_s32_vg1x2j11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za32_s32_vg1x2(uint32_t base, svint32x2_t val) __arm_streaming __arm_inout("za") { @@ -1104,16 +788,12 @@ void test_svwrite_za32_s32_vg1x2(uint32_t base, svint32x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_za32_u32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za32_u32_vg1x2j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za32_u32_vg1x2(uint32_t base, svuint32x2_t val) __arm_streaming __arm_inout("za") { @@ -1122,16 +802,12 @@ void test_svwrite_za32_u32_vg1x2(uint32_t base, svuint32x2_t val) __arm_streamin // CHECK-LABEL: @test_svwrite_za32_f32_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za32_f32_vg1x2j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv4f32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za32_f32_vg1x2(uint32_t base, svfloat32x2_t val) __arm_streaming __arm_inout("za") { @@ -1140,16 +816,12 @@ void test_svwrite_za32_f32_vg1x2(uint32_t base, svfloat32x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za64_u64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za64_u64_vg1x2j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) __arm_streaming __arm_inout("za") { @@ -1158,16 +830,12 @@ void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) __arm_streamin // CHECK-LABEL: @test_svwrite_za64_f64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za64_f64_vg1x2j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) __arm_streaming __arm_inout("za") { @@ -1176,16 +844,12 @@ void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za64_s64_vg1x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za64_s64_vg1x2j11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming __arm_inout("za") { @@ -1194,20 +858,12 @@ void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_za8_s8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svwrite_za8_s8_vg1x4j10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za8_s8_vg1x4(uint32_t base, svint8x4_t val) __arm_streaming __arm_inout("za") { @@ -1216,20 +872,12 @@ void test_svwrite_za8_s8_vg1x4(uint32_t base, svint8x4_t val) __arm_streaming __ // CHECK-LABEL: @test_svwrite_za8_u8_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svwrite_za8_u8_vg1x4j11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv16i8(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za8_u8_vg1x4(uint32_t base, svuint8x4_t val) __arm_streaming __arm_inout("za") { @@ -1238,20 +886,12 @@ void test_svwrite_za8_u8_vg1x4(uint32_t base, svuint8x4_t val) __arm_streaming _ // CHECK-LABEL: @test_svwrite_za16_s16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za16_s16_vg1x4j11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_s16_vg1x4(uint32_t base, svint16x4_t val) __arm_streaming __arm_inout("za") { @@ -1260,20 +900,12 @@ void test_svwrite_za16_s16_vg1x4(uint32_t base, svint16x4_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_za16_u16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za16_u16_vg1x4j12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8i16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_u16_vg1x4(uint32_t base, svuint16x4_t val) __arm_streaming __arm_inout("za") { @@ -1282,20 +914,12 @@ void test_svwrite_za16_u16_vg1x4(uint32_t base, svuint16x4_t val) __arm_streamin // CHECK-LABEL: @test_svwrite_za16_bf16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svwrite_za16_bf16_vg1x4j14svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8bf16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_bf16_vg1x4(uint32_t base, svbfloat16x4_t val) __arm_streaming __arm_inout("za") { @@ -1304,20 +928,12 @@ void test_svwrite_za16_bf16_vg1x4(uint32_t base, svbfloat16x4_t val) __arm_strea // CHECK-LABEL: @test_svwrite_za16_f16_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za16_f16_vg1x4j13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv8f16(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za16_f16_vg1x4(uint32_t base, svfloat16x4_t val) __arm_streaming __arm_inout("za") { @@ -1326,20 +942,12 @@ void test_svwrite_za16_f16_vg1x4(uint32_t base, svfloat16x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za32_s32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za32_s32_vg1x4j11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za32_s32_vg1x4(uint32_t base, svint32x4_t val) __arm_streaming __arm_inout("za") { @@ -1348,20 +956,12 @@ void test_svwrite_za32_s32_vg1x4(uint32_t base, svint32x4_t val) __arm_streaming // CHECK-LABEL: @test_svwrite_za32_u32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za32_u32_vg1x4j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4i32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za32_u32_vg1x4(uint32_t base, svuint32x4_t val) __arm_streaming __arm_inout("za") { @@ -1370,20 +970,12 @@ void test_svwrite_za32_u32_vg1x4(uint32_t base, svuint32x4_t val) __arm_streamin // CHECK-LABEL: @test_svwrite_za32_f32_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za32_f32_vg1x4j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv4f32(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za32_f32_vg1x4(uint32_t base, svfloat32x4_t val) __arm_streaming __arm_inout("za") { @@ -1392,20 +984,12 @@ void test_svwrite_za32_f32_vg1x4(uint32_t base, svfloat32x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za64_u64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za64_u64_vg1x4j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) __arm_streaming __arm_inout("za") { @@ -1414,20 +998,12 @@ void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) __arm_streamin // CHECK-LABEL: @test_svwrite_za64_f64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za64_f64_vg1x4j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) __arm_streaming __arm_inout("za") { @@ -1436,20 +1012,12 @@ void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) __arm_streami // CHECK-LABEL: @test_svwrite_za64_s64_vg1x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svwrite_za64_s64_vg1x4j11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[BASE:%.*]], [[VAL_COERCE0:%.*]], [[VAL_COERCE1:%.*]], [[VAL_COERCE2:%.*]], [[VAL_COERCE3:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c index 466ca130326335..77b02b4c4708fa 100644 --- a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c +++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c @@ -8,150 +8,186 @@ // // X2- hor -// CHECK-LABEL: define dso_local @test_svreadz_hor_za8_s8_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za8_s8_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svreadz_hor_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za8_s8_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za8_u8_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za8_u8_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_hor_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svreadz_hor_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za8_u8_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_s16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za16_s16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svreadz_hor_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za16_s16_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_u16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za16_u16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8i16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svreadz_hor_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za16_u16_vg2(1, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_f16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za16_f16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8f16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svreadz_hor_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za16_f16_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_bf16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z29test_svreadz_hor_za16_bf16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_hor_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv8bf16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -159,150 +195,186 @@ svbfloat16x2_t test_svreadz_hor_za16_bf16_x2(uint32_t slice) __arm_streaming __a } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_s32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za32_s32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svreadz_hor_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za32_s32_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_u32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za32_u32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4i32(i32 2, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svreadz_hor_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za32_u32_vg2(2, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_f32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za32_f32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv4f32(i32 3, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svreadz_hor_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za32_f32_vg2(3, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_s64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za64_s64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svreadz_hor_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za64_s64_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_u64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za64_u64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2i64(i32 4, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svreadz_hor_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za64_u64_vg2(4, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_f64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_hor_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za64_f64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_hor_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.horiz.x2.nxv2f64(i32 7, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -314,150 +386,186 @@ svfloat64x2_t test_svreadz_hor_za64_f64_x2(uint32_t slice) __arm_streaming __arm // X2- ver // -// CHECK-LABEL: define dso_local @test_svreadz_ver_za8_s8_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_s8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_ver_za8_s8_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_s8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svreadz_ver_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za8_s8_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za8_u8_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_ver_za8_u8_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z26test_svreadz_ver_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svreadz_ver_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za8_u8_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_s16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za16_s16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svreadz_ver_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za16_s16_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_u16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za16_u16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8i16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svreadz_ver_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za16_u16_vg2(1, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_f16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za16_f16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8f16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svreadz_ver_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za16_f16_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_bf16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z29test_svreadz_ver_za16_bf16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z29test_svreadz_ver_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv8bf16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -465,150 +573,186 @@ svbfloat16x2_t test_svreadz_ver_za16_bf16_x2(uint32_t slice) __arm_streaming __a } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za32_s32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za32_s32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svreadz_ver_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za32_s32_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za32_u32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za32_u32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4i32(i32 2, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svreadz_ver_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za32_u32_vg2(2, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za32_f32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za32_f32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv4f32(i32 3, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svreadz_ver_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za32_f32_vg2(3, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za64_s64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za64_s64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svreadz_ver_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za64_s64_vg2(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za64_u64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za64_u64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2i64(i32 4, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svreadz_ver_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za64_u64_vg2(4, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za64_f64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_ver_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za64_f64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z28test_svreadz_ver_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.vert.x2.nxv2f64(i32 7, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -618,9 +762,10 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // // X4 - hor -// CHECK-LABEL: define dso_local @test_svreadz_hor_za8_s8_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -630,11 +775,14 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za8_s8_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -644,16 +792,19 @@ svfloat64x2_t test_svreadz_ver_za64_f64_x2(uint32_t slice) __arm_streaming __arm // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za8_s8_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za8_u8_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -663,11 +814,14 @@ svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_hor_za8_u8_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_hor_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -677,16 +831,19 @@ svint8x4_t test_svreadz_hor_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za8_u8_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_s16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -696,11 +853,14 @@ svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za16_s16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -710,16 +870,19 @@ svuint8x4_t test_svreadz_hor_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za16_s16_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_u16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -729,11 +892,14 @@ svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za16_u16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8i16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -743,16 +909,19 @@ svint16x4_t test_svreadz_hor_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za16_u16_vg4(1, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_f16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -762,11 +931,14 @@ svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za16_f16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8f16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -776,16 +948,19 @@ svuint16x4_t test_svreadz_hor_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za16_f16_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za16_bf16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -795,11 +970,14 @@ svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z29test_svreadz_hor_za16_bf16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_hor_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv8bf16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -809,7 +987,9 @@ svfloat16x4_t test_svreadz_hor_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -817,9 +997,10 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_s32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -829,11 +1010,14 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za32_s32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -843,16 +1027,19 @@ svbfloat16x4_t test_svreadz_hor_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za32_s32_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_u32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -862,11 +1049,14 @@ svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za32_u32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4i32(i32 2, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -876,16 +1066,19 @@ svint32x4_t test_svreadz_hor_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za32_u32_vg4(2, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za32_f32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -895,11 +1088,14 @@ svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za32_f32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv4f32(i32 3, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -909,16 +1105,19 @@ svuint32x4_t test_svreadz_hor_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za32_f32_vg4(3, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_s64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -928,11 +1127,14 @@ svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za64_s64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -942,16 +1144,19 @@ svfloat32x4_t test_svreadz_hor_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za64_s64_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_u64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -961,11 +1166,14 @@ svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za64_u64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2i64(i32 4, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -975,16 +1183,19 @@ svint64x4_t test_svreadz_hor_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_hor_za64_u64_vg4(4, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_hor_za64_f64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_hor_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -994,11 +1205,14 @@ svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_hor_za64_f64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_hor_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.horiz.x4.nxv2f64(i32 7, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1008,7 +1222,9 @@ svuint64x4_t test_svreadz_hor_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1017,9 +1233,10 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // // X4 - ver -// CHECK-LABEL: define dso_local @test_svreadz_ver_za8_s8_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1029,11 +1246,14 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_ver_za8_s8_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1043,16 +1263,19 @@ svfloat64x4_t test_svreadz_hor_za64_f64_x4(uint32_t slice) __arm_streaming __arm // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za8_s8_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za8_u8_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1062,11 +1285,14 @@ svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z26test_svreadz_ver_za8_u8_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z26test_svreadz_ver_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -1076,16 +1302,19 @@ svint8x4_t test_svreadz_ver_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inou // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za8_u8_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_s16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1095,11 +1324,14 @@ svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za16_s16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1109,16 +1341,19 @@ svuint8x4_t test_svreadz_ver_za8_u8_x4(uint32_t slice) __arm_streaming __arm_ino // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za16_s16_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_u16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1128,11 +1363,14 @@ svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za16_u16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8i16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -1142,16 +1380,19 @@ svint16x4_t test_svreadz_ver_za16_s16_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za16_u16_vg4(1, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_f16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -1161,11 +1402,14 @@ svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za16_f16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8f16(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -1175,16 +1419,19 @@ svuint16x4_t test_svreadz_ver_za16_u16_x4(uint32_t slice) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za16_f16_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za16_bf16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -1194,11 +1441,14 @@ svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z29test_svreadz_ver_za16_bf16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z29test_svreadz_ver_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv8bf16(i32 1, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -1208,7 +1458,9 @@ svfloat16x4_t test_svreadz_ver_za16_f16_x4(uint32_t slice) __arm_streaming __arm // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1216,9 +1468,10 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za32_s32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1228,11 +1481,14 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za32_s32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1242,16 +1498,19 @@ svbfloat16x4_t test_svreadz_ver_za16_bf16_x4(uint32_t slice) __arm_streaming __a // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za32_s32_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za32_u32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1261,11 +1520,14 @@ svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za32_u32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4i32(i32 2, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -1275,16 +1537,19 @@ svint32x4_t test_svreadz_ver_za32_s32_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za32_u32_vg4(2, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za32_f32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -1294,11 +1559,14 @@ svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za32_f32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv4f32(i32 3, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -1308,16 +1576,19 @@ svuint32x4_t test_svreadz_ver_za32_u32_x4(uint32_t slice) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za32_f32_vg4(3, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za64_s64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1327,11 +1598,14 @@ svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za64_s64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1341,16 +1615,19 @@ svfloat32x4_t test_svreadz_ver_za32_f32_x4(uint32_t slice) __arm_streaming __arm // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za64_s64_vg4(0, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za64_u64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1360,11 +1637,14 @@ svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za64_u64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2i64(i32 4, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -1374,16 +1654,19 @@ svint64x4_t test_svreadz_ver_za64_s64_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_ver_za64_u64_vg4(4, slice); } -// CHECK-LABEL: define dso_local @test_svreadz_ver_za64_f64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_ver_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1393,11 +1676,14 @@ svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z28test_svreadz_ver_za64_f64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z28test_svreadz_ver_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.vert.x4.nxv2f64(i32 7, i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -1407,7 +1693,9 @@ svuint64x4_t test_svreadz_ver_za64_u64_x4(uint32_t slice) __arm_streaming __arm_ // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svreadz_ver_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -1415,13 +1703,13 @@ svfloat64x4_t test_svreadz_ver_za64_f64_x4(uint32_t slice) __arm_streaming __arm } // CHECK-LABEL: define dso_local @test_svreadz_hor_za8_s8( -// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]]) // CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: define dso_local @_Z23test_svreadz_hor_za8_s8j( -// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readz.horiz.nxv16i8(i32 0, i32 [[SLICE]]) // CPP-CHECK-NEXT: ret [[TMP0]] @@ -1829,300 +2117,372 @@ svfloat64_t test_svreadz_hor_za128_f64(uint32_t slice) __arm_streaming __arm_ino // X2 // -// CHECK-LABEL: define dso_local @test_svreadz_za8_s8_x2( -// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LABEL: define dso_local { , } @test_svreadz_za8_s8_x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svreadz_za8_s8_x2j( -// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_s8_x2j( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint8x2_t test_svreadz_za8_s8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za8_s8_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za8_u8_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za8_u8_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svreadz_za8_u8_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svreadz_za8_u8_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv16i8(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint8x2_t test_svreadz_za8_u8_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za8_u8_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_s16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za16_s16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za16_s16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_s16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint16x2_t test_svreadz_za16_s16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_s16_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_u16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za16_u16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za16_u16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_u16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8i16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint16x2_t test_svreadz_za16_u16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_u16_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za32_s32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za32_s32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za32_s32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_s32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint32x2_t test_svreadz_za32_s32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za32_s32_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za32_u32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za32_u32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za32_u32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_u32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4i32(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint32x2_t test_svreadz_za32_u32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za32_u32_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za64_s64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za64_s64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za64_s64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_s64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svint64x2_t test_svreadz_za64_s64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za64_s64_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za64_u64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za64_u64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za64_u64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_u64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2i64(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svuint64x2_t test_svreadz_za64_u64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za64_u64_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_bf16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za16_bf16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_za16_bf16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z25test_svreadz_za16_bf16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8bf16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svbfloat16x2_t test_svreadz_za16_bf16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_bf16_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_f16_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za16_f16_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za16_f16_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za16_f16_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv8f16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat16x2_t test_svreadz_za16_f16_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_f16_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za32_f32_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za32_f32_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za32_f32_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za32_f32_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv4f32(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat32x2_t test_svreadz_za32_f32_x2(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za32_f32_vg1x2(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za64_f64_x2( +// CHECK-LABEL: define dso_local { , } @test_svreadz_za64_f64_x2( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za64_f64_x2j( +// CPP-CHECK-LABEL: define dso_local { , } @_Z24test_svreadz_za64_f64_x2j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.readz.x2.nxv2f64(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_inout("za") { @@ -2133,9 +2493,10 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // X4 // -// CHECK-LABEL: define dso_local @test_svreadz_za8_s8_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_s8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -2145,11 +2506,14 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svreadz_za8_s8_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_s8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -2159,16 +2523,19 @@ svfloat64x2_t test_svreadz_za64_f64_x2(uint32_t slice) __arm_streaming __arm_ino // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za8_s8_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za8_u8_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za8_u8_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -2178,11 +2545,14 @@ svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("z // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svreadz_za8_u8_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z22test_svreadz_za8_u8_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv16i8(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) @@ -2192,16 +2562,19 @@ svint8x4_t test_svreadz_za8_s8_x4(uint32_t slice) __arm_streaming __arm_inout("z // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za8_u8_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_s16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_s16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -2211,11 +2584,14 @@ svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout(" // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za16_s16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_s16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -2225,16 +2601,19 @@ svuint8x4_t test_svreadz_za8_u8_x4(uint32_t slice) __arm_streaming __arm_inout(" // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_s16_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_u16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_u16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -2244,11 +2623,14 @@ svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za16_u16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_u16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8i16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) @@ -2258,16 +2640,19 @@ svint16x4_t test_svreadz_za16_s16_x4(uint32_t slice) __arm_streaming __arm_inout // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_u16_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za32_s32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_s32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -2277,11 +2662,14 @@ svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za32_s32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_s32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -2291,16 +2679,19 @@ svuint16x4_t test_svreadz_za16_u16_x4(uint32_t slice) __arm_streaming __arm_inou // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za32_s32_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za32_u32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_u32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -2310,11 +2701,14 @@ svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za32_u32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_u32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4i32(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) @@ -2324,16 +2718,19 @@ svint32x4_t test_svreadz_za32_s32_x4(uint32_t slice) __arm_streaming __arm_inout // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za32_u32_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za64_s64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_s64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -2343,11 +2740,14 @@ svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za64_s64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_s64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -2357,16 +2757,19 @@ svuint32x4_t test_svreadz_za32_u32_x4(uint32_t slice) __arm_streaming __arm_inou // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za64_s64_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za64_u64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_u64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -2376,11 +2779,14 @@ svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za64_u64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_u64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2i64(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) @@ -2390,16 +2796,19 @@ svint64x4_t test_svreadz_za64_s64_x4(uint32_t slice) __arm_streaming __arm_inout // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za64_u64_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_bf16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_bf16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -2409,11 +2818,14 @@ svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inou // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z25test_svreadz_za16_bf16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z25test_svreadz_za16_bf16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8bf16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP1]], i64 0) @@ -2423,16 +2835,19 @@ svuint64x4_t test_svreadz_za64_u64_x4(uint32_t slice) __arm_streaming __arm_inou // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_bf16_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za16_f16_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za16_f16_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -2442,11 +2857,14 @@ svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_i // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za16_f16_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za16_f16_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv8f16(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) @@ -2456,16 +2874,19 @@ svbfloat16x4_t test_svreadz_za16_bf16_x4(uint32_t slice) __arm_streaming __arm_i // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za16_f16_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za32_f32_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za32_f32_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -2475,11 +2896,14 @@ svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za32_f32_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za32_f32_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv4f32(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) @@ -2489,16 +2913,19 @@ svfloat16x4_t test_svreadz_za16_f16_x4(uint32_t slice) __arm_streaming __arm_ino // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_inout("za") { return svreadz_za32_f32_vg1x4(slice); } -// CHECK-LABEL: define dso_local @test_svreadz_za64_f64_x4( +// CHECK-LABEL: define dso_local { , , , } @test_svreadz_za64_f64_x4( // CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -2508,11 +2935,14 @@ svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_ino // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret { , , , } [[TMP9]] // -// CPP-CHECK-LABEL: define dso_local @_Z24test_svreadz_za64_f64_x4j( +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z24test_svreadz_za64_f64_x4j( // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , , , }, align 16 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.readz.x4.nxv2f64(i32 [[SLICE]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) @@ -2522,7 +2952,9 @@ svfloat32x4_t test_svreadz_za32_f32_x4(uint32_t slice) __arm_streaming __arm_ino // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: store [[TMP8]], ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = load { , , , }, ptr [[RETVAL]], align 16 +// CPP-CHECK-NEXT: ret { , , , } [[TMP9]] // svfloat64x4_t test_svreadz_za64_f64_x4(uint32_t slice) __arm_streaming __arm_inout("za") { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c index abcb48fb8fa1a0..4691172b14a69f 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c @@ -24,15 +24,15 @@ // CHECK-LABEL: @test_svcreate2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svcreate2_bf16u14__SVBfloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svbfloat16x2_t test_svcreate2_bf16(svbfloat16_t x0, svbfloat16_t x1) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c index 2338f80fd08966..275908eb819c94 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c @@ -24,15 +24,15 @@ // CHECK-LABEL: @test_svcreate2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svcreate2_s8u10__SVInt8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) ATTR { @@ -41,15 +41,15 @@ svint8x2_t test_svcreate2_s8(svint8_t x0, svint8_t x1) ATTR // CHECK-LABEL: @test_svcreate2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_s16u11__SVInt16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) ATTR { @@ -58,15 +58,15 @@ svint16x2_t test_svcreate2_s16(svint16_t x0, svint16_t x1) ATTR // CHECK-LABEL: @test_svcreate2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_s32u11__SVInt32_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) ATTR { @@ -75,15 +75,15 @@ svint32x2_t test_svcreate2_s32(svint32_t x0, svint32_t x1) ATTR // CHECK-LABEL: @test_svcreate2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_s64u11__SVInt64_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) ATTR { @@ -92,15 +92,15 @@ svint64x2_t test_svcreate2_s64(svint64_t x0, svint64_t x1) ATTR // CHECK-LABEL: @test_svcreate2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svcreate2_u8u11__SVUint8_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) ATTR { @@ -109,15 +109,15 @@ svuint8x2_t test_svcreate2_u8(svuint8_t x0, svuint8_t x1) ATTR // CHECK-LABEL: @test_svcreate2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_u16u12__SVUint16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) ATTR { @@ -126,15 +126,15 @@ svuint16x2_t test_svcreate2_u16(svuint16_t x0, svuint16_t x1) ATTR // CHECK-LABEL: @test_svcreate2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_u32u12__SVUint32_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) ATTR { @@ -143,15 +143,15 @@ svuint32x2_t test_svcreate2_u32(svuint32_t x0, svuint32_t x1) ATTR // CHECK-LABEL: @test_svcreate2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_u64u12__SVUint64_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) ATTR { @@ -160,15 +160,15 @@ svuint64x2_t test_svcreate2_u64(svuint64_t x0, svuint64_t x1) ATTR // CHECK-LABEL: @test_svcreate2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_f16u13__SVFloat16_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) ATTR { @@ -177,15 +177,15 @@ svfloat16x2_t test_svcreate2_f16(svfloat16_t x0, svfloat16_t x1) ATTR // CHECK-LABEL: @test_svcreate2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_f32u13__SVFloat32_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) ATTR { @@ -194,15 +194,15 @@ svfloat32x2_t test_svcreate2_f32(svfloat32_t x0, svfloat32_t x1) ATTR // CHECK-LABEL: @test_svcreate2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svcreate2_f64u13__SVFloat64_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat64x2_t test_svcreate2_f64(svfloat64_t x0, svfloat64_t x1) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c index 86533e58f56178..3e2bd259e5c7c1 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c @@ -24,17 +24,17 @@ // CHECK-LABEL: @test_svcreate3_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svcreate3_bf16u14__SVBfloat16_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svbfloat16x3_t test_svcreate3_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c index aeff07104c189f..1395d92cc06ce0 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c @@ -24,17 +24,17 @@ // CHECK-LABEL: @test_svcreate3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z17test_svcreate3_s8u10__SVInt8_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) ATTR { @@ -43,17 +43,17 @@ svint8x3_t test_svcreate3_s8(svint8_t x0, svint8_t x1, svint8_t x2) ATTR // CHECK-LABEL: @test_svcreate3_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_s16u11__SVInt16_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) ATTR { @@ -62,17 +62,17 @@ svint16x3_t test_svcreate3_s16(svint16_t x0, svint16_t x1, svint16_t x2) ATTR // CHECK-LABEL: @test_svcreate3_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_s32u11__SVInt32_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) ATTR { @@ -81,17 +81,17 @@ svint32x3_t test_svcreate3_s32(svint32_t x0, svint32_t x1, svint32_t x2) ATTR // CHECK-LABEL: @test_svcreate3_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_s64u11__SVInt64_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) ATTR { @@ -100,17 +100,17 @@ svint64x3_t test_svcreate3_s64(svint64_t x0, svint64_t x1, svint64_t x2) ATTR // CHECK-LABEL: @test_svcreate3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z17test_svcreate3_u8u11__SVUint8_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) ATTR { @@ -119,17 +119,17 @@ svuint8x3_t test_svcreate3_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2) ATTR // CHECK-LABEL: @test_svcreate3_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_u16u12__SVUint16_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) ATTR { @@ -138,17 +138,17 @@ svuint16x3_t test_svcreate3_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2) ATT // CHECK-LABEL: @test_svcreate3_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_u32u12__SVUint32_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) ATTR { @@ -157,17 +157,17 @@ svuint32x3_t test_svcreate3_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2) ATT // CHECK-LABEL: @test_svcreate3_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_u64u12__SVUint64_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) ATTR { @@ -176,17 +176,17 @@ svuint64x3_t test_svcreate3_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2) ATT // CHECK-LABEL: @test_svcreate3_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_f16u13__SVFloat16_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) ATTR { @@ -195,17 +195,17 @@ svfloat16x3_t test_svcreate3_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2) // CHECK-LABEL: @test_svcreate3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP1]], [[X2:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_f32u13__SVFloat32_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP1]], [[X2:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) ATTR { @@ -214,17 +214,17 @@ svfloat32x3_t test_svcreate3_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2) // CHECK-LABEL: @test_svcreate3_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP1]], [[X2:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svcreate3_f64u13__SVFloat64_tS_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP1]], [[X2:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svfloat64x3_t test_svcreate3_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c index 3067ae4875719c..e821c7501d7a9b 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c @@ -24,19 +24,19 @@ // CHECK-LABEL: @test_svcreate4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[X4:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svcreate4_bf16u14__SVBfloat16_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP2]], [[X4:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svbfloat16x4_t test_svcreate4_bf16(svbfloat16_t x0, svbfloat16_t x1, svbfloat16_t x2, svbfloat16_t x4) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c index b2781e97f7ec2e..97672e2833bebf 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c @@ -24,19 +24,19 @@ // CHECK-LABEL: @test_svcreate4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z17test_svcreate4_s8u10__SVInt8_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) ATTR { @@ -45,19 +45,19 @@ svint8x4_t test_svcreate4_s8(svint8_t x0, svint8_t x1, svint8_t x2, svint8_t x4) // CHECK-LABEL: @test_svcreate4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_s16u11__SVInt16_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16_t x4) ATTR { @@ -66,19 +66,19 @@ svint16x4_t test_svcreate4_s16(svint16_t x0, svint16_t x1, svint16_t x2, svint16 // CHECK-LABEL: @test_svcreate4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_s32u11__SVInt32_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x4) ATTR { @@ -87,19 +87,19 @@ svint32x4_t test_svcreate4_s32(svint32_t x0, svint32_t x1, svint32_t x2, svint32 // CHECK-LABEL: @test_svcreate4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_s64u11__SVInt64_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64_t x4) ATTR { @@ -108,19 +108,19 @@ svint64x4_t test_svcreate4_s64(svint64_t x0, svint64_t x1, svint64_t x2, svint64 // CHECK-LABEL: @test_svcreate4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z17test_svcreate4_u8u11__SVUint8_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP1]], [[X2:%.*]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[X4:%.*]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_t x4) ATTR { @@ -129,19 +129,19 @@ svuint8x4_t test_svcreate4_u8(svuint8_t x0, svuint8_t x1, svuint8_t x2, svuint8_ // CHECK-LABEL: @test_svcreate4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_u16u12__SVUint16_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[X4:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svuint16_t x4) ATTR { @@ -150,19 +150,19 @@ svuint16x4_t test_svcreate4_u16(svuint16_t x0, svuint16_t x1, svuint16_t x2, svu // CHECK-LABEL: @test_svcreate4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_u32u12__SVUint32_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP1]], [[X2:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[X4:%.*]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svuint32_t x4) ATTR { @@ -171,19 +171,19 @@ svuint32x4_t test_svcreate4_u32(svuint32_t x0, svuint32_t x1, svuint32_t x2, svu // CHECK-LABEL: @test_svcreate4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_u64u12__SVUint64_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP1]], [[X2:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[X4:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svuint64_t x4) ATTR { @@ -192,19 +192,19 @@ svuint64x4_t test_svcreate4_u64(svuint64_t x0, svuint64_t x1, svuint64_t x2, svu // CHECK-LABEL: @test_svcreate4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP1]], [[X2:%.*]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[X4:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_f16u13__SVFloat16_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP0]], [[X1:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP1]], [[X2:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[X4:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, svfloat16_t x4) ATTR { @@ -213,19 +213,19 @@ svfloat16x4_t test_svcreate4_f16(svfloat16_t x0, svfloat16_t x1, svfloat16_t x2, // CHECK-LABEL: @test_svcreate4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP1]], [[X2:%.*]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[X4:%.*]], i64 12) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_f32u13__SVFloat32_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP0]], [[X1:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP1]], [[X2:%.*]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[X4:%.*]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, svfloat32_t x4) ATTR { @@ -234,19 +234,19 @@ svfloat32x4_t test_svcreate4_f32(svfloat32_t x0, svfloat32_t x1, svfloat32_t x2, // CHECK-LABEL: @test_svcreate4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP1]], [[X2:%.*]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[X4:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svcreate4_f64u13__SVFloat64_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP0]], [[X1:%.*]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP1]], [[X2:%.*]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[X4:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat64x4_t test_svcreate4_f64(svfloat64_t x0, svfloat64_t x1, svfloat64_t x2, svfloat64_t x4) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c index eb66fbf0558697..05d9ec66847944 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c @@ -24,13 +24,17 @@ // CHECK-LABEL: @test_svget2_bf16_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svget2_bf16_014svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) ATTR { @@ -39,13 +43,17 @@ svbfloat16_t test_svget2_bf16_0(svbfloat16x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_bf16_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svget2_bf16_114svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svbfloat16_t test_svget2_bf16_1(svbfloat16x2_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c index 6f1b0e415bfecb..c9693583f2f539 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c @@ -25,13 +25,17 @@ // CHECK-LABEL: @test_svget2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svget2_s810svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svint8_t test_svget2_s8(svint8x2_t tuple) ATTR { @@ -40,13 +44,17 @@ svint8_t test_svget2_s8(svint8x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_s1611svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svint16_t test_svget2_s16(svint16x2_t tuple) ATTR { @@ -55,13 +63,17 @@ svint16_t test_svget2_s16(svint16x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_s3211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svint32_t test_svget2_s32(svint32x2_t tuple) ATTR { @@ -70,13 +82,17 @@ svint32_t test_svget2_s32(svint32x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_s6411svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svint64_t test_svget2_s64(svint64x2_t tuple) ATTR { @@ -85,13 +101,17 @@ svint64_t test_svget2_s64(svint64x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svget2_u811svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svuint8_t test_svget2_u8(svuint8x2_t tuple) ATTR { @@ -100,13 +120,17 @@ svuint8_t test_svget2_u8(svuint8x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_u1612svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svuint16_t test_svget2_u16(svuint16x2_t tuple) ATTR { @@ -115,13 +139,17 @@ svuint16_t test_svget2_u16(svuint16x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_u3212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svuint32_t test_svget2_u32(svuint32x2_t tuple) ATTR { @@ -130,13 +158,17 @@ svuint32_t test_svget2_u32(svuint32x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_u6412svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[TUPLE:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svuint64_t test_svget2_u64(svuint64x2_t tuple) ATTR { @@ -145,13 +177,17 @@ svuint64_t test_svget2_u64(svuint64x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_f1613svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svfloat16_t test_svget2_f16(svfloat16x2_t tuple) ATTR { @@ -160,13 +196,17 @@ svfloat16_t test_svget2_f16(svfloat16x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[TUPLE:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_f3213svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[TUPLE:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svfloat32_t test_svget2_f32(svfloat32x2_t tuple) ATTR { @@ -175,13 +215,17 @@ svfloat32_t test_svget2_f32(svfloat32x2_t tuple) ATTR // CHECK-LABEL: @test_svget2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_f6413svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svfloat64_t test_svget2_f64(svfloat64x2_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c index c89f686c6bd2ae..950c4dad9749af 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c @@ -24,13 +24,19 @@ // CHECK-LABEL: @test_svget3_bf16_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svget3_bf16_014svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: ret [[TMP3]] // svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) ATTR { @@ -39,13 +45,19 @@ svbfloat16_t test_svget3_bf16_0(svbfloat16x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_bf16_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svget3_bf16_114svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: ret [[TMP3]] // svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) ATTR { @@ -54,13 +66,19 @@ svbfloat16_t test_svget3_bf16_1(svbfloat16x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_bf16_2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svget3_bf16_214svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: ret [[TMP3]] // svbfloat16_t test_svget3_bf16_2(svbfloat16x3_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c index a7f74d29ed2904..d85121262f28e2 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c @@ -26,13 +26,19 @@ // CHECK-LABEL: @test_svget3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z14test_svget3_s810svint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: ret [[TMP3]] // svint8_t test_svget3_s8(svint8x3_t tuple) ATTR { @@ -41,13 +47,19 @@ svint8_t test_svget3_s8(svint8x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_s1611svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: ret [[TMP3]] // svint16_t test_svget3_s16(svint16x3_t tuple) ATTR { @@ -56,13 +68,19 @@ svint16_t test_svget3_s16(svint16x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_s3211svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: ret [[TMP3]] // svint32_t test_svget3_s32(svint32x3_t tuple) ATTR { @@ -71,13 +89,19 @@ svint32_t test_svget3_s32(svint32x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_s6411svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: ret [[TMP3]] // svint64_t test_svget3_s64(svint64x3_t tuple) ATTR { @@ -86,13 +110,19 @@ svint64_t test_svget3_s64(svint64x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 32) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z14test_svget3_u811svuint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[TUPLE:%.*]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: ret [[TMP3]] // svuint8_t test_svget3_u8(svuint8x3_t tuple) ATTR { @@ -101,13 +131,19 @@ svuint8_t test_svget3_u8(svuint8x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_u1612svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: ret [[TMP3]] // svuint16_t test_svget3_u16(svuint16x3_t tuple) ATTR { @@ -116,13 +152,19 @@ svuint16_t test_svget3_u16(svuint16x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_u3212svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: ret [[TMP3]] // svuint32_t test_svget3_u32(svuint32x3_t tuple) ATTR { @@ -131,13 +173,19 @@ svuint32_t test_svget3_u32(svuint32x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_u6412svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[TUPLE:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: ret [[TMP3]] // svuint64_t test_svget3_u64(svuint64x3_t tuple) ATTR { @@ -146,13 +194,19 @@ svuint64_t test_svget3_u64(svuint64x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_f1613svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: ret [[TMP3]] // svfloat16_t test_svget3_f16(svfloat16x3_t tuple) ATTR { @@ -161,13 +215,19 @@ svfloat16_t test_svget3_f16(svfloat16x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_f3213svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: ret [[TMP3]] // svfloat32_t test_svget3_f32(svfloat32x3_t tuple) ATTR { @@ -176,13 +236,19 @@ svfloat32_t test_svget3_f32(svfloat32x3_t tuple) ATTR // CHECK-LABEL: @test_svget3_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[TUPLE:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: ret [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svget3_f6413svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[TUPLE:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: ret [[TMP3]] // svfloat64_t test_svget3_f64(svfloat64x3_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c index 79eed6912e3ced..55b379140ca3dd 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c @@ -24,13 +24,21 @@ // CHECK-LABEL: @test_svget4_bf16_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svget4_bf16_014svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) ATTR { @@ -39,13 +47,21 @@ svbfloat16_t test_svget4_bf16_0(svbfloat16x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_bf16_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svget4_bf16_114svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) ATTR { @@ -54,13 +70,21 @@ svbfloat16_t test_svget4_bf16_1(svbfloat16x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_bf16_2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svget4_bf16_214svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) ATTR { @@ -69,13 +93,21 @@ svbfloat16_t test_svget4_bf16_2(svbfloat16x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_bf16_3( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svget4_bf16_314svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[TUPLE:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbfloat16_t test_svget4_bf16_3(svbfloat16x4_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c index 278c217a0def41..d3eaae22a373fa 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c @@ -27,13 +27,21 @@ // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z14test_svget4_s810svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: ret [[TMP4]] // svint8_t test_svget4_s8(svint8x4_t tuple) ATTR { @@ -42,13 +50,21 @@ svint8_t test_svget4_s8(svint8x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_s1611svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] // svint16_t test_svget4_s16(svint16x4_t tuple) ATTR { @@ -57,13 +73,21 @@ svint16_t test_svget4_s16(svint16x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_s3211svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] // svint32_t test_svget4_s32(svint32x4_t tuple) ATTR { @@ -72,13 +96,21 @@ svint32_t test_svget4_s32(svint32x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_s6411svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: ret [[TMP4]] // svint64_t test_svget4_s64(svint64x4_t tuple) ATTR { @@ -87,13 +119,21 @@ svint64_t test_svget4_s64(svint64x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 32) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z14test_svget4_u811svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[TUPLE:%.*]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] // svuint8_t test_svget4_u8(svuint8x4_t tuple) ATTR { @@ -102,13 +142,21 @@ svuint8_t test_svget4_u8(svuint8x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_u1612svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[TUPLE:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: ret [[TMP4]] // svuint16_t test_svget4_u16(svuint16x4_t tuple) ATTR { @@ -117,13 +165,21 @@ svuint16_t test_svget4_u16(svuint16x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_u3212svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: ret [[TMP4]] // svuint32_t test_svget4_u32(svuint32x4_t tuple) ATTR { @@ -132,13 +188,21 @@ svuint32_t test_svget4_u32(svuint32x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_u6412svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[TUPLE:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: ret [[TMP4]] // svuint64_t test_svget4_u64(svuint64x4_t tuple) ATTR { @@ -147,13 +211,21 @@ svuint64_t test_svget4_u64(svuint64x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_f1613svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] // svfloat16_t test_svget4_f16(svfloat16x4_t tuple) ATTR { @@ -162,13 +234,21 @@ svfloat16_t test_svget4_f16(svfloat16x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_f3213svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: ret [[TMP4]] // svfloat32_t test_svget4_f32(svfloat32x4_t tuple) ATTR { @@ -177,13 +257,21 @@ svfloat32_t test_svget4_f32(svfloat32x4_t tuple) ATTR // CHECK-LABEL: @test_svget4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[TUPLE:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_f6413svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[TUPLE:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: ret [[TMP4]] // svfloat64_t test_svget4_f64(svfloat64x4_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c index fcae89c50b0e65..5535b3d090d323 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c @@ -26,21 +26,13 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR { @@ -53,22 +45,14 @@ svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z20test_svld2_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c index 992b51fa251233..3097cb9cbcaab4 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c @@ -26,20 +26,12 @@ // CHECK-LABEL: @test_svld2_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svld2_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base) MODE_ATTR { @@ -50,21 +42,13 @@ svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base) MODE_ATTR { @@ -75,21 +59,13 @@ svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base) MODE_ATTR { @@ -100,21 +76,13 @@ svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base) MODE_ATTR { @@ -124,20 +92,12 @@ svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base) MODE_ATTR // CHECK-LABEL: @test_svld2_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svld2_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base) MODE_ATTR { @@ -148,21 +108,13 @@ svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base) MODE_ATTR { @@ -173,21 +125,13 @@ svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base) MODE_ATTR { @@ -198,21 +142,13 @@ svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base) MODE_ATTR { @@ -223,21 +159,13 @@ svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base) MODE_ATTR { @@ -248,21 +176,13 @@ svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base) MODE_ATTR { @@ -273,21 +193,13 @@ svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld2_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) MODE_ATTR { @@ -298,21 +210,13 @@ svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svld2_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR { @@ -324,22 +228,14 @@ svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MOD // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR { @@ -351,22 +247,14 @@ svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR { @@ -378,22 +266,14 @@ svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR { @@ -404,21 +284,13 @@ svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svld2_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR { @@ -430,22 +302,14 @@ svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) M // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR { @@ -457,22 +321,14 @@ svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR { @@ -484,22 +340,14 @@ svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR { @@ -511,22 +359,14 @@ svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR { @@ -538,22 +378,14 @@ svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR { @@ -565,22 +397,14 @@ svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld2_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c index 9100d27534c1ca..41a367b737b0f7 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c @@ -27,25 +27,13 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR { @@ -57,26 +45,14 @@ svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z20test_svld3_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c index 10206b5362e11e..2deb5a1d4930cf 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c @@ -25,24 +25,12 @@ // CHECK-LABEL: @test_svld3_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svld3_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] // svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base) MODE_ATTR { @@ -53,25 +41,13 @@ svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base) MODE_ATTR { @@ -82,25 +58,13 @@ svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base) MODE_ATTR { @@ -111,25 +75,13 @@ svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base) MODE_ATTR { @@ -139,24 +91,12 @@ svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base) MODE_ATTR // CHECK-LABEL: @test_svld3_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svld3_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] // svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base) MODE_ATTR { @@ -167,25 +107,13 @@ svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base) MODE_ATTR { @@ -196,25 +124,13 @@ svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base) MODE_ATTR { @@ -225,25 +141,13 @@ svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base) MODE_ATTR { @@ -254,25 +158,13 @@ svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base) MODE_ATTR { @@ -283,25 +175,13 @@ svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base) MODE_ATTR { @@ -312,25 +192,13 @@ svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld3_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) MODE_ATTR { @@ -341,25 +209,13 @@ svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svld3_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR { @@ -371,26 +227,14 @@ svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MOD // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR { @@ -402,26 +246,14 @@ svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR { @@ -433,26 +265,14 @@ svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR { @@ -463,25 +283,13 @@ svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svld3_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR { @@ -493,26 +301,14 @@ svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) M // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR { @@ -524,26 +320,14 @@ svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR { @@ -555,26 +339,14 @@ svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR { @@ -586,26 +358,14 @@ svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR { @@ -617,26 +377,14 @@ svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR { @@ -648,26 +396,14 @@ svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld3_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , } [[TMP2]] // svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c index 0f21ffdb6f7093..a88e6f11a05108 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c @@ -26,29 +26,13 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR { @@ -60,30 +44,14 @@ svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 24) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z20test_svld4_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c index 06e07307886534..30796a4f46a724 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c @@ -25,28 +25,12 @@ // CHECK-LABEL: @test_svld4_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svld4_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base) MODE_ATTR { @@ -57,29 +41,13 @@ svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base) MODE_ATTR { @@ -90,29 +58,13 @@ svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base) MODE_ATTR { @@ -123,29 +75,13 @@ svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base) MODE_ATTR { @@ -155,28 +91,12 @@ svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base) MODE_ATTR // CHECK-LABEL: @test_svld4_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z13test_svld4_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base) MODE_ATTR { @@ -187,29 +107,13 @@ svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base) MODE_ATTR { @@ -220,29 +124,13 @@ svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base) MODE_ATTR { @@ -253,29 +141,13 @@ svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base) MODE_ATTR { @@ -286,29 +158,13 @@ svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base) MODE_ATTR { @@ -319,29 +175,13 @@ svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base) MODE_ATTR { @@ -352,29 +192,13 @@ svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z14test_svld4_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) MODE_ATTR { @@ -385,29 +209,13 @@ svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) MODE_ATTR // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svld4_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR { @@ -419,30 +227,14 @@ svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MOD // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR { @@ -454,30 +246,14 @@ svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR { @@ -489,30 +265,14 @@ svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR { @@ -523,29 +283,13 @@ svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svld4_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( [[PG:%.*]], ptr [[TMP0]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR { @@ -557,30 +301,14 @@ svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) M // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR { @@ -592,30 +320,14 @@ svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR { @@ -627,30 +339,14 @@ svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR { @@ -662,30 +358,14 @@ svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 24) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR { @@ -697,30 +377,14 @@ svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 12) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR { @@ -732,30 +396,14 @@ svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 6) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z19test_svld4_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP0]], ptr [[TMP1]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP2]], 2 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP2]], 3 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP2]] // svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c index 41208bfb1f435c..02704229292b2f 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c @@ -59,18 +59,51 @@ // // TUPLE2-LABEL: @test_svreinterpret_s8_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s8_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -79,18 +112,51 @@ // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s8_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s8, _bf16)(op); @@ -103,18 +169,51 @@ TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_s16_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_s16_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -123,18 +222,51 @@ TYPE(svint8) test_svreinterpret_s8_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s16_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s16, _bf16)(op); @@ -147,18 +279,51 @@ TYPE(svint16) test_svreinterpret_s16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_s32_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_s32_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -167,18 +332,51 @@ TYPE(svint16) test_svreinterpret_s16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s32_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s32, _bf16)(op); @@ -190,18 +388,51 @@ TYPE(svint32) test_svreinterpret_s32_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_s64_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_s64_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -210,18 +441,51 @@ TYPE(svint32) test_svreinterpret_s32_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_s64_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s64, _bf16)(op); @@ -234,18 +498,51 @@ TYPE(svint64) test_svreinterpret_s64_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_u8_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u8_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -254,18 +551,51 @@ TYPE(svint64) test_svreinterpret_s64_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u8_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_u8, _bf16)(op); @@ -278,18 +608,51 @@ TYPE(svuint8) test_svreinterpret_u8_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_u16_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_u16_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -298,18 +661,51 @@ TYPE(svuint8) test_svreinterpret_u8_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u16_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_u16, _bf16)(op); @@ -322,18 +718,51 @@ TYPE(svuint16) test_svreinterpret_u16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_u32_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_u32_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -342,18 +771,51 @@ TYPE(svuint16) test_svreinterpret_u16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u32_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_u32, _bf16)(op); @@ -366,18 +828,51 @@ TYPE(svuint32) test_svreinterpret_u32_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_u64_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_u64_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -386,18 +881,51 @@ TYPE(svuint32) test_svreinterpret_u32_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_u64_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_u64, _bf16)(op); @@ -410,18 +938,51 @@ TYPE(svuint64) test_svreinterpret_u64_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -430,18 +991,51 @@ TYPE(svuint64) test_svreinterpret_u64_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_s8(TYPE(svint8) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _s8)(op); @@ -454,18 +1048,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s8(TYPE(svint8) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -474,18 +1101,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s8(TYPE(svint8) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_s16(TYPE(svint16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _s16)(op); @@ -498,18 +1158,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s16(TYPE(svint16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -518,18 +1211,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s16(TYPE(svint16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_s32(TYPE(svint32) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _s32)(op); @@ -542,18 +1268,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s32(TYPE(svint32) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -562,18 +1321,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s32(TYPE(svint32) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_s64(TYPE(svint64) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _s64)(op); @@ -586,18 +1378,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s64(TYPE(svint64) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_bf16_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -606,18 +1431,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_s64(TYPE(svint64) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_bf16_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_u8(TYPE(svuint8) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _u8)(op); @@ -630,18 +1488,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u8(TYPE(svuint8) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -650,18 +1541,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u8(TYPE(svuint8) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_u16(TYPE(svuint16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _u16)(op); @@ -674,18 +1598,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u16(TYPE(svuint16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -694,18 +1651,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u16(TYPE(svuint16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_u32(TYPE(svuint32) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _u32)(op); @@ -718,18 +1708,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u32(TYPE(svuint32) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -738,18 +1761,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u32(TYPE(svuint32) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_u64(TYPE(svuint64) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _u64)(op); @@ -761,15 +1817,42 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u64(TYPE(svuint64) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z28test_svreinterpret_bf16_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -777,15 +1860,42 @@ TYPE(svbfloat16) test_svreinterpret_bf16_u64(TYPE(svuint64) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z28test_svreinterpret_bf16_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svbfloat16) test_svreinterpret_bf16_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _bf16)(op); @@ -798,18 +1908,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -818,18 +1961,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_f16(TYPE(svfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _f16)(op); @@ -842,18 +2018,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_f16(TYPE(svfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -862,18 +2071,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_f16(TYPE(svfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_f32(TYPE(svfloat32) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _f32)(op); @@ -886,18 +2128,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_f32(TYPE(svfloat32) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_bf16_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_bf16_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_bf16_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_bf16_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -906,18 +2181,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_f32(TYPE(svfloat32) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_bf16_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svbfloat16) test_svreinterpret_bf16_f64(TYPE(svfloat64) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_bf16, _f64)(op); @@ -930,18 +2238,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_f64(TYPE(svfloat64) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_f32_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_f32_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -950,18 +2291,51 @@ TYPE(svbfloat16) test_svreinterpret_bf16_f64(TYPE(svfloat64) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f32_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_f32, _bf16)(op); @@ -974,18 +2348,51 @@ TYPE(svfloat32) test_svreinterpret_f32_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_f16_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_f16_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -994,18 +2401,51 @@ TYPE(svfloat32) test_svreinterpret_f32_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f16_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_f16, _bf16)(op); @@ -1018,18 +2458,51 @@ TYPE(svfloat16) test_svreinterpret_f16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // TUPLE2-LABEL: @test_svreinterpret_f64_bf16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_bf16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_bf16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z27test_svreinterpret_f64_bf16u14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: @@ -1038,18 +2511,51 @@ TYPE(svfloat16) test_svreinterpret_f16_bf16(TYPE(svbfloat16) op) MODE_ATTR { // // CPP-TUPLE2-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z27test_svreinterpret_f64_bf1614svbfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_bf16(TYPE(svbfloat16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_f64, _bf16)(op); diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c index e61bbf3e03d7e2..7c21c297b6a3db 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret.c @@ -57,15 +57,42 @@ // // TUPLE2-LABEL: @test_svreinterpret_s8_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s8_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s8_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z24test_svreinterpret_s8_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -73,15 +100,42 @@ // // CPP-TUPLE2-LABEL: @_Z24test_svreinterpret_s8_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z24test_svreinterpret_s8_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z24test_svreinterpret_s8_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint8) test_svreinterpret_s8_s8(TYPE(svint8) op) MODE_ATTR { @@ -95,18 +149,51 @@ TYPE(svint8) test_svreinterpret_s8_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -115,18 +202,51 @@ TYPE(svint8) test_svreinterpret_s8_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_s16(TYPE(svint16) op) MODE_ATTR { @@ -140,18 +260,51 @@ TYPE(svint8) test_svreinterpret_s8_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -160,18 +313,51 @@ TYPE(svint8) test_svreinterpret_s8_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_s32(TYPE(svint32) op) MODE_ATTR { @@ -185,18 +371,51 @@ TYPE(svint8) test_svreinterpret_s8_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -205,18 +424,51 @@ TYPE(svint8) test_svreinterpret_s8_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_s64(TYPE(svint64) op) MODE_ATTR { @@ -229,15 +481,42 @@ TYPE(svint8) test_svreinterpret_s8_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s8_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s8_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z24test_svreinterpret_s8_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -245,15 +524,42 @@ TYPE(svint8) test_svreinterpret_s8_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z24test_svreinterpret_s8_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z24test_svreinterpret_s8_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z24test_svreinterpret_s8_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint8) test_svreinterpret_s8_u8(TYPE(svuint8) op) MODE_ATTR { @@ -267,18 +573,51 @@ TYPE(svint8) test_svreinterpret_s8_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -287,25 +626,57 @@ TYPE(svint8) test_svreinterpret_s8_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_u16(TYPE(svuint16) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s8,_u16)(op); } -// // CHECK-LABEL: @test_svreinterpret_s8_u32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to @@ -313,18 +684,51 @@ TYPE(svint8) test_svreinterpret_s8_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -333,25 +737,57 @@ TYPE(svint8) test_svreinterpret_s8_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_u32(TYPE(svuint32) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s8,_u32)(op); } -// // CHECK-LABEL: @test_svreinterpret_s8_u64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to @@ -359,18 +795,51 @@ TYPE(svint8) test_svreinterpret_s8_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -379,18 +848,51 @@ TYPE(svint8) test_svreinterpret_s8_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_u64(TYPE(svuint64) op) MODE_ATTR { @@ -404,18 +906,51 @@ TYPE(svint8) test_svreinterpret_s8_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -424,18 +959,51 @@ TYPE(svint8) test_svreinterpret_s8_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -449,18 +1017,51 @@ TYPE(svint8) test_svreinterpret_s8_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -469,18 +1070,51 @@ TYPE(svint8) test_svreinterpret_s8_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -494,18 +1128,51 @@ TYPE(svint8) test_svreinterpret_s8_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s8_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s8_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s8_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s8_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -514,18 +1181,51 @@ TYPE(svint8) test_svreinterpret_s8_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s8_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s8_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s8_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint8) test_svreinterpret_s8_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -539,18 +1239,51 @@ TYPE(svint8) test_svreinterpret_s8_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s16_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -559,18 +1292,51 @@ TYPE(svint8) test_svreinterpret_s8_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s16_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s16_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s16_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_s8(TYPE(svint8) op) MODE_ATTR { @@ -583,15 +1349,42 @@ TYPE(svint16) test_svreinterpret_s16_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s16_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s16_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -599,15 +1392,42 @@ TYPE(svint16) test_svreinterpret_s16_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint16) test_svreinterpret_s16_s16(TYPE(svint16) op) MODE_ATTR { @@ -621,18 +1441,51 @@ TYPE(svint16) test_svreinterpret_s16_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -641,18 +1494,51 @@ TYPE(svint16) test_svreinterpret_s16_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_s32(TYPE(svint32) op) MODE_ATTR { @@ -666,18 +1552,51 @@ TYPE(svint16) test_svreinterpret_s16_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -686,18 +1605,51 @@ TYPE(svint16) test_svreinterpret_s16_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_s64(TYPE(svint64) op) MODE_ATTR { @@ -711,18 +1663,51 @@ TYPE(svint16) test_svreinterpret_s16_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s16_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -731,18 +1716,51 @@ TYPE(svint16) test_svreinterpret_s16_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s16_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s16_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s16_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_u8(TYPE(svuint8) op) MODE_ATTR { @@ -755,15 +1773,42 @@ TYPE(svint16) test_svreinterpret_s16_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s16_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s16_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -771,15 +1816,42 @@ TYPE(svint16) test_svreinterpret_s16_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint16) test_svreinterpret_s16_u16(TYPE(svuint16) op) MODE_ATTR { @@ -793,18 +1865,51 @@ TYPE(svint16) test_svreinterpret_s16_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -813,18 +1918,51 @@ TYPE(svint16) test_svreinterpret_s16_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_u32(TYPE(svuint32) op) MODE_ATTR { @@ -838,18 +1976,51 @@ TYPE(svint16) test_svreinterpret_s16_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -858,18 +2029,51 @@ TYPE(svint16) test_svreinterpret_s16_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_u64(TYPE(svuint64) op) MODE_ATTR { @@ -883,18 +2087,51 @@ TYPE(svint16) test_svreinterpret_s16_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -903,18 +2140,51 @@ TYPE(svint16) test_svreinterpret_s16_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -928,18 +2198,51 @@ TYPE(svint16) test_svreinterpret_s16_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -948,18 +2251,51 @@ TYPE(svint16) test_svreinterpret_s16_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -973,18 +2309,51 @@ TYPE(svint16) test_svreinterpret_s16_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s16_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s16_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s16_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s16_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -993,18 +2362,51 @@ TYPE(svint16) test_svreinterpret_s16_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s16_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s16_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s16_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint16) test_svreinterpret_s16_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -1018,18 +2420,51 @@ TYPE(svint16) test_svreinterpret_s16_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s32_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -1038,18 +2473,51 @@ TYPE(svint16) test_svreinterpret_s16_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s32_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s32_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s32_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_s8(TYPE(svint8) op) MODE_ATTR { @@ -1063,18 +2531,51 @@ TYPE(svint32) test_svreinterpret_s32_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -1083,18 +2584,51 @@ TYPE(svint32) test_svreinterpret_s32_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_s16(TYPE(svint16) op) MODE_ATTR { @@ -1107,15 +2641,42 @@ TYPE(svint32) test_svreinterpret_s32_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s32_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s32_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -1123,15 +2684,42 @@ TYPE(svint32) test_svreinterpret_s32_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint32) test_svreinterpret_s32_s32(TYPE(svint32) op) MODE_ATTR { @@ -1145,18 +2733,51 @@ TYPE(svint32) test_svreinterpret_s32_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -1165,18 +2786,51 @@ TYPE(svint32) test_svreinterpret_s32_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_s64(TYPE(svint64) op) MODE_ATTR { @@ -1190,18 +2844,51 @@ TYPE(svint32) test_svreinterpret_s32_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s32_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -1210,18 +2897,51 @@ TYPE(svint32) test_svreinterpret_s32_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s32_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s32_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s32_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_u8(TYPE(svuint8) op) MODE_ATTR { @@ -1235,18 +2955,51 @@ TYPE(svint32) test_svreinterpret_s32_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -1255,18 +3008,51 @@ TYPE(svint32) test_svreinterpret_s32_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_u16(TYPE(svuint16) op) MODE_ATTR { @@ -1279,15 +3065,42 @@ TYPE(svint32) test_svreinterpret_s32_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s32_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s32_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -1295,15 +3108,42 @@ TYPE(svint32) test_svreinterpret_s32_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint32) test_svreinterpret_s32_u32(TYPE(svuint32) op) MODE_ATTR { @@ -1317,18 +3157,51 @@ TYPE(svint32) test_svreinterpret_s32_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -1337,18 +3210,51 @@ TYPE(svint32) test_svreinterpret_s32_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_u64(TYPE(svuint64) op) MODE_ATTR { @@ -1362,18 +3268,51 @@ TYPE(svint32) test_svreinterpret_s32_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -1382,18 +3321,51 @@ TYPE(svint32) test_svreinterpret_s32_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -1407,18 +3379,51 @@ TYPE(svint32) test_svreinterpret_s32_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -1427,25 +3432,57 @@ TYPE(svint32) test_svreinterpret_s32_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_f32(TYPE(svfloat32) op) MODE_ATTR { return SVE_ACLE_FUNC(svreinterpret_s32,_f32)(op); } -// // CHECK-LABEL: @test_svreinterpret_s32_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to @@ -1453,18 +3490,51 @@ TYPE(svint32) test_svreinterpret_s32_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s32_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s32_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s32_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s32_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -1473,18 +3543,51 @@ TYPE(svint32) test_svreinterpret_s32_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s32_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s32_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s32_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint32) test_svreinterpret_s32_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -1498,18 +3601,51 @@ TYPE(svint32) test_svreinterpret_s32_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s64_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -1518,18 +3654,51 @@ TYPE(svint32) test_svreinterpret_s32_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s64_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s64_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s64_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_s8(TYPE(svint8) op) MODE_ATTR { @@ -1543,18 +3712,51 @@ TYPE(svint64) test_svreinterpret_s64_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -1563,18 +3765,51 @@ TYPE(svint64) test_svreinterpret_s64_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_s16(TYPE(svint16) op) MODE_ATTR { @@ -1588,18 +3823,51 @@ TYPE(svint64) test_svreinterpret_s64_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -1608,18 +3876,51 @@ TYPE(svint64) test_svreinterpret_s64_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_s32(TYPE(svint32) op) MODE_ATTR { @@ -1632,15 +3933,42 @@ TYPE(svint64) test_svreinterpret_s64_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s64_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s64_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -1648,15 +3976,42 @@ TYPE(svint64) test_svreinterpret_s64_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint64) test_svreinterpret_s64_s64(TYPE(svint64) op) MODE_ATTR { @@ -1670,18 +4025,51 @@ TYPE(svint64) test_svreinterpret_s64_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_s64_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -1690,18 +4078,51 @@ TYPE(svint64) test_svreinterpret_s64_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_s64_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_s64_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_s64_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_u8(TYPE(svuint8) op) MODE_ATTR { @@ -1715,18 +4136,51 @@ TYPE(svint64) test_svreinterpret_s64_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -1735,18 +4189,51 @@ TYPE(svint64) test_svreinterpret_s64_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_u16(TYPE(svuint16) op) MODE_ATTR { @@ -1760,18 +4247,51 @@ TYPE(svint64) test_svreinterpret_s64_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -1780,18 +4300,51 @@ TYPE(svint64) test_svreinterpret_s64_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_u32(TYPE(svuint32) op) MODE_ATTR { @@ -1804,15 +4357,42 @@ TYPE(svint64) test_svreinterpret_s64_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_s64_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_s64_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -1820,15 +4400,42 @@ TYPE(svint64) test_svreinterpret_s64_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svint64) test_svreinterpret_s64_u64(TYPE(svuint64) op) MODE_ATTR { @@ -1842,18 +4449,51 @@ TYPE(svint64) test_svreinterpret_s64_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -1862,18 +4502,51 @@ TYPE(svint64) test_svreinterpret_s64_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -1887,18 +4560,51 @@ TYPE(svint64) test_svreinterpret_s64_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -1907,18 +4613,51 @@ TYPE(svint64) test_svreinterpret_s64_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -1932,18 +4671,51 @@ TYPE(svint64) test_svreinterpret_s64_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_s64_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_s64_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_s64_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_s64_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -1952,18 +4724,51 @@ TYPE(svint64) test_svreinterpret_s64_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_s64_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_s64_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_s64_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svint64) test_svreinterpret_s64_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -1976,15 +4781,42 @@ TYPE(svint64) test_svreinterpret_s64_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u8_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u8_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z24test_svreinterpret_u8_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -1992,15 +4824,42 @@ TYPE(svint64) test_svreinterpret_s64_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z24test_svreinterpret_u8_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z24test_svreinterpret_u8_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z24test_svreinterpret_u8_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint8) test_svreinterpret_u8_s8(TYPE(svint8) op) MODE_ATTR { @@ -2014,18 +4873,51 @@ TYPE(svuint8) test_svreinterpret_u8_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -2034,18 +4926,51 @@ TYPE(svuint8) test_svreinterpret_u8_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_s16(TYPE(svint16) op) MODE_ATTR { @@ -2059,18 +4984,51 @@ TYPE(svuint8) test_svreinterpret_u8_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -2079,18 +5037,51 @@ TYPE(svuint8) test_svreinterpret_u8_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_s32(TYPE(svint32) op) MODE_ATTR { @@ -2104,18 +5095,51 @@ TYPE(svuint8) test_svreinterpret_u8_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -2124,18 +5148,51 @@ TYPE(svuint8) test_svreinterpret_u8_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_s64(TYPE(svint64) op) MODE_ATTR { @@ -2148,15 +5205,42 @@ TYPE(svuint8) test_svreinterpret_u8_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u8_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u8_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z24test_svreinterpret_u8_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -2164,15 +5248,42 @@ TYPE(svuint8) test_svreinterpret_u8_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z24test_svreinterpret_u8_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z24test_svreinterpret_u8_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z24test_svreinterpret_u8_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint8) test_svreinterpret_u8_u8(TYPE(svuint8) op) MODE_ATTR { @@ -2186,18 +5297,51 @@ TYPE(svuint8) test_svreinterpret_u8_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -2206,18 +5350,51 @@ TYPE(svuint8) test_svreinterpret_u8_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_u16(TYPE(svuint16) op) MODE_ATTR { @@ -2231,18 +5408,51 @@ TYPE(svuint8) test_svreinterpret_u8_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -2251,18 +5461,51 @@ TYPE(svuint8) test_svreinterpret_u8_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_u32(TYPE(svuint32) op) MODE_ATTR { @@ -2276,18 +5519,51 @@ TYPE(svuint8) test_svreinterpret_u8_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -2296,18 +5572,51 @@ TYPE(svuint8) test_svreinterpret_u8_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_u64(TYPE(svuint64) op) MODE_ATTR { @@ -2321,18 +5630,51 @@ TYPE(svuint8) test_svreinterpret_u8_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -2341,18 +5683,51 @@ TYPE(svuint8) test_svreinterpret_u8_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -2366,18 +5741,51 @@ TYPE(svuint8) test_svreinterpret_u8_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -2386,18 +5794,51 @@ TYPE(svuint8) test_svreinterpret_u8_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -2411,18 +5852,51 @@ TYPE(svuint8) test_svreinterpret_u8_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u8_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u8_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u8_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u8_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -2431,18 +5905,51 @@ TYPE(svuint8) test_svreinterpret_u8_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u8_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u8_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u8_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint8) test_svreinterpret_u8_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -2456,18 +5963,51 @@ TYPE(svuint8) test_svreinterpret_u8_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u16_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -2476,18 +6016,51 @@ TYPE(svuint8) test_svreinterpret_u8_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u16_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u16_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u16_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_s8(TYPE(svint8) op) MODE_ATTR { @@ -2500,15 +6073,42 @@ TYPE(svuint16) test_svreinterpret_u16_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u16_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u16_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -2516,15 +6116,42 @@ TYPE(svuint16) test_svreinterpret_u16_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint16) test_svreinterpret_u16_s16(TYPE(svint16) op) MODE_ATTR { @@ -2538,18 +6165,51 @@ TYPE(svuint16) test_svreinterpret_u16_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -2558,18 +6218,51 @@ TYPE(svuint16) test_svreinterpret_u16_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_s32(TYPE(svint32) op) MODE_ATTR { @@ -2583,18 +6276,51 @@ TYPE(svuint16) test_svreinterpret_u16_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -2603,18 +6329,51 @@ TYPE(svuint16) test_svreinterpret_u16_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_s64(TYPE(svint64) op) MODE_ATTR { @@ -2628,18 +6387,51 @@ TYPE(svuint16) test_svreinterpret_u16_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u16_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -2648,18 +6440,51 @@ TYPE(svuint16) test_svreinterpret_u16_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u16_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u16_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u16_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_u8(TYPE(svuint8) op) MODE_ATTR { @@ -2672,15 +6497,42 @@ TYPE(svuint16) test_svreinterpret_u16_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u16_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u16_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -2688,15 +6540,42 @@ TYPE(svuint16) test_svreinterpret_u16_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint16) test_svreinterpret_u16_u16(TYPE(svuint16) op) MODE_ATTR { @@ -2710,18 +6589,51 @@ TYPE(svuint16) test_svreinterpret_u16_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -2730,18 +6642,51 @@ TYPE(svuint16) test_svreinterpret_u16_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_u32(TYPE(svuint32) op) MODE_ATTR { @@ -2755,18 +6700,51 @@ TYPE(svuint16) test_svreinterpret_u16_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -2775,18 +6753,51 @@ TYPE(svuint16) test_svreinterpret_u16_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_u64(TYPE(svuint64) op) MODE_ATTR { @@ -2800,18 +6811,51 @@ TYPE(svuint16) test_svreinterpret_u16_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -2820,18 +6864,51 @@ TYPE(svuint16) test_svreinterpret_u16_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -2845,18 +6922,51 @@ TYPE(svuint16) test_svreinterpret_u16_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -2865,18 +6975,51 @@ TYPE(svuint16) test_svreinterpret_u16_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -2890,18 +7033,51 @@ TYPE(svuint16) test_svreinterpret_u16_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u16_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u16_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u16_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u16_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -2910,18 +7086,51 @@ TYPE(svuint16) test_svreinterpret_u16_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u16_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u16_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u16_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint16) test_svreinterpret_u16_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -2935,18 +7144,51 @@ TYPE(svuint16) test_svreinterpret_u16_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u32_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -2955,18 +7197,51 @@ TYPE(svuint16) test_svreinterpret_u16_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u32_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u32_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u32_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_s8(TYPE(svint8) op) MODE_ATTR { @@ -2980,18 +7255,51 @@ TYPE(svuint32) test_svreinterpret_u32_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -3000,18 +7308,51 @@ TYPE(svuint32) test_svreinterpret_u32_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_s16(TYPE(svint16) op) MODE_ATTR { @@ -3024,15 +7365,42 @@ TYPE(svuint32) test_svreinterpret_u32_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u32_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u32_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -3040,15 +7408,42 @@ TYPE(svuint32) test_svreinterpret_u32_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint32) test_svreinterpret_u32_s32(TYPE(svint32) op) MODE_ATTR { @@ -3062,18 +7457,51 @@ TYPE(svuint32) test_svreinterpret_u32_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -3082,18 +7510,51 @@ TYPE(svuint32) test_svreinterpret_u32_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_s64(TYPE(svint64) op) MODE_ATTR { @@ -3107,18 +7568,51 @@ TYPE(svuint32) test_svreinterpret_u32_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u32_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -3127,18 +7621,51 @@ TYPE(svuint32) test_svreinterpret_u32_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u32_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u32_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u32_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_u8(TYPE(svuint8) op) MODE_ATTR { @@ -3152,18 +7679,51 @@ TYPE(svuint32) test_svreinterpret_u32_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -3172,18 +7732,51 @@ TYPE(svuint32) test_svreinterpret_u32_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_u16(TYPE(svuint16) op) MODE_ATTR { @@ -3196,15 +7789,42 @@ TYPE(svuint32) test_svreinterpret_u32_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u32_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u32_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -3212,15 +7832,42 @@ TYPE(svuint32) test_svreinterpret_u32_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint32) test_svreinterpret_u32_u32(TYPE(svuint32) op) MODE_ATTR { @@ -3234,18 +7881,51 @@ TYPE(svuint32) test_svreinterpret_u32_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -3254,18 +7934,51 @@ TYPE(svuint32) test_svreinterpret_u32_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_u64(TYPE(svuint64) op) MODE_ATTR { @@ -3279,18 +7992,51 @@ TYPE(svuint32) test_svreinterpret_u32_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -3299,18 +8045,51 @@ TYPE(svuint32) test_svreinterpret_u32_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -3324,18 +8103,51 @@ TYPE(svuint32) test_svreinterpret_u32_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -3344,18 +8156,51 @@ TYPE(svuint32) test_svreinterpret_u32_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -3369,18 +8214,51 @@ TYPE(svuint32) test_svreinterpret_u32_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u32_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u32_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u32_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u32_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -3389,18 +8267,51 @@ TYPE(svuint32) test_svreinterpret_u32_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u32_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u32_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u32_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint32) test_svreinterpret_u32_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -3414,18 +8325,51 @@ TYPE(svuint32) test_svreinterpret_u32_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u64_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -3434,18 +8378,51 @@ TYPE(svuint32) test_svreinterpret_u32_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u64_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u64_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u64_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_s8(TYPE(svint8) op) MODE_ATTR { @@ -3459,18 +8436,51 @@ TYPE(svuint64) test_svreinterpret_u64_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -3479,18 +8489,51 @@ TYPE(svuint64) test_svreinterpret_u64_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_s16(TYPE(svint16) op) MODE_ATTR { @@ -3504,18 +8547,51 @@ TYPE(svuint64) test_svreinterpret_u64_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -3524,18 +8600,51 @@ TYPE(svuint64) test_svreinterpret_u64_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_s32(TYPE(svint32) op) MODE_ATTR { @@ -3548,15 +8657,42 @@ TYPE(svuint64) test_svreinterpret_u64_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u64_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u64_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -3564,15 +8700,42 @@ TYPE(svuint64) test_svreinterpret_u64_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint64) test_svreinterpret_u64_s64(TYPE(svint64) op) MODE_ATTR { @@ -3586,18 +8749,51 @@ TYPE(svuint64) test_svreinterpret_u64_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_u64_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -3606,18 +8802,51 @@ TYPE(svuint64) test_svreinterpret_u64_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_u64_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_u64_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_u64_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_u8(TYPE(svuint8) op) MODE_ATTR { @@ -3631,18 +8860,51 @@ TYPE(svuint64) test_svreinterpret_u64_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -3651,18 +8913,51 @@ TYPE(svuint64) test_svreinterpret_u64_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_u16(TYPE(svuint16) op) MODE_ATTR { @@ -3676,18 +8971,51 @@ TYPE(svuint64) test_svreinterpret_u64_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -3696,18 +9024,51 @@ TYPE(svuint64) test_svreinterpret_u64_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_u32(TYPE(svuint32) op) MODE_ATTR { @@ -3720,15 +9081,42 @@ TYPE(svuint64) test_svreinterpret_u64_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_u64_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_u64_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -3736,15 +9124,42 @@ TYPE(svuint64) test_svreinterpret_u64_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svuint64) test_svreinterpret_u64_u64(TYPE(svuint64) op) MODE_ATTR { @@ -3758,18 +9173,51 @@ TYPE(svuint64) test_svreinterpret_u64_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -3778,18 +9226,51 @@ TYPE(svuint64) test_svreinterpret_u64_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -3803,18 +9284,51 @@ TYPE(svuint64) test_svreinterpret_u64_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -3823,18 +9337,51 @@ TYPE(svuint64) test_svreinterpret_u64_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -3848,18 +9395,51 @@ TYPE(svuint64) test_svreinterpret_u64_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_u64_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_u64_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_u64_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_u64_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -3868,18 +9448,51 @@ TYPE(svuint64) test_svreinterpret_u64_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_u64_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_u64_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_u64_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svuint64) test_svreinterpret_u64_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -3893,18 +9506,51 @@ TYPE(svuint64) test_svreinterpret_u64_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_f16_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -3913,18 +9559,51 @@ TYPE(svuint64) test_svreinterpret_u64_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_f16_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_f16_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_f16_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_s8(TYPE(svint8) op) MODE_ATTR { @@ -3938,18 +9617,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -3958,18 +9670,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_s16(TYPE(svint16) op) MODE_ATTR { @@ -3983,18 +9728,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -4003,18 +9781,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_s32(TYPE(svint32) op) MODE_ATTR { @@ -4028,18 +9839,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -4048,18 +9892,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_s64(TYPE(svint64) op) MODE_ATTR { @@ -4073,18 +9950,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_f16_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -4093,18 +10003,51 @@ TYPE(svfloat16) test_svreinterpret_f16_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_f16_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_f16_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_f16_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_u8(TYPE(svuint8) op) MODE_ATTR { @@ -4118,18 +10061,51 @@ TYPE(svfloat16) test_svreinterpret_f16_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -4138,18 +10114,51 @@ TYPE(svfloat16) test_svreinterpret_f16_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_u16(TYPE(svuint16) op) MODE_ATTR { @@ -4163,18 +10172,51 @@ TYPE(svfloat16) test_svreinterpret_f16_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -4183,18 +10225,51 @@ TYPE(svfloat16) test_svreinterpret_f16_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_u32(TYPE(svuint32) op) MODE_ATTR { @@ -4208,18 +10283,51 @@ TYPE(svfloat16) test_svreinterpret_f16_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -4228,18 +10336,51 @@ TYPE(svfloat16) test_svreinterpret_f16_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_u64(TYPE(svuint64) op) MODE_ATTR { @@ -4252,15 +10393,42 @@ TYPE(svfloat16) test_svreinterpret_f16_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_f16_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_f16_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -4268,15 +10436,42 @@ TYPE(svfloat16) test_svreinterpret_f16_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svfloat16) test_svreinterpret_f16_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -4290,18 +10485,51 @@ TYPE(svfloat16) test_svreinterpret_f16_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -4310,18 +10538,51 @@ TYPE(svfloat16) test_svreinterpret_f16_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -4335,18 +10596,51 @@ TYPE(svfloat16) test_svreinterpret_f16_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f16_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f16_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f16_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f16_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -4355,18 +10649,51 @@ TYPE(svfloat16) test_svreinterpret_f16_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f16_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f16_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f16_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat16) test_svreinterpret_f16_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -4380,18 +10707,51 @@ TYPE(svfloat16) test_svreinterpret_f16_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_f32_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -4400,18 +10760,51 @@ TYPE(svfloat16) test_svreinterpret_f16_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_f32_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_f32_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_f32_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_s8(TYPE(svint8) op) MODE_ATTR { @@ -4425,18 +10818,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -4445,18 +10871,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_s16(TYPE(svint16) op) MODE_ATTR { @@ -4470,18 +10929,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -4490,18 +10982,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_s32(TYPE(svint32) op) MODE_ATTR { @@ -4515,18 +11040,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -4535,18 +11093,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_s64(TYPE(svint64) op) MODE_ATTR { @@ -4560,18 +11151,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_f32_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -4580,18 +11204,51 @@ TYPE(svfloat32) test_svreinterpret_f32_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_f32_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_f32_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_f32_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_u8(TYPE(svuint8) op) MODE_ATTR { @@ -4605,18 +11262,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -4625,18 +11315,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_u16(TYPE(svuint16) op) MODE_ATTR { @@ -4650,18 +11373,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -4670,18 +11426,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_u32(TYPE(svuint32) op) MODE_ATTR { @@ -4695,18 +11484,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -4715,18 +11537,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_u64(TYPE(svuint64) op) MODE_ATTR { @@ -4740,18 +11595,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -4760,18 +11648,51 @@ TYPE(svfloat32) test_svreinterpret_f32_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -4784,15 +11705,42 @@ TYPE(svfloat32) test_svreinterpret_f32_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_f32_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_f32_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -4800,15 +11748,42 @@ TYPE(svfloat32) test_svreinterpret_f32_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svfloat32) test_svreinterpret_f32_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -4822,18 +11797,51 @@ TYPE(svfloat32) test_svreinterpret_f32_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f32_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f32_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f32_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f32_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -4842,18 +11850,51 @@ TYPE(svfloat32) test_svreinterpret_f32_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f32_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f32_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f32_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat32) test_svreinterpret_f32_f64(TYPE(svfloat64) op) MODE_ATTR { @@ -4867,18 +11908,51 @@ TYPE(svfloat32) test_svreinterpret_f32_f64(TYPE(svfloat64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_s8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_s8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_s8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_f64_s8u10__SVInt8_t( // CPP-CHECK-NEXT: entry: @@ -4887,18 +11961,51 @@ TYPE(svfloat32) test_svreinterpret_f32_f64(TYPE(svfloat64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_f64_s810svint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_f64_s810svint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_f64_s810svint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_s8(TYPE(svint8) op) MODE_ATTR { @@ -4912,18 +12019,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s8(TYPE(svint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_s16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_s16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_s16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_s16u11__SVInt16_t( // CPP-CHECK-NEXT: entry: @@ -4932,18 +12072,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s8(TYPE(svint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_s1611svint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_s1611svint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_s1611svint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_s16(TYPE(svint16) op) MODE_ATTR { @@ -4957,18 +12130,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s16(TYPE(svint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_s32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_s32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_s32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_s32u11__SVInt32_t( // CPP-CHECK-NEXT: entry: @@ -4977,18 +12183,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s16(TYPE(svint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_s3211svint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_s3211svint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_s3211svint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_s32(TYPE(svint32) op) MODE_ATTR { @@ -5002,18 +12241,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s32(TYPE(svint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_s64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_s64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_s64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_s64u11__SVInt64_t( // CPP-CHECK-NEXT: entry: @@ -5022,18 +12294,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s32(TYPE(svint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_s6411svint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_s6411svint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_s6411svint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_s64(TYPE(svint64) op) MODE_ATTR { @@ -5047,18 +12352,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s64(TYPE(svint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_u8( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_u8( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_u8( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z25test_svreinterpret_f64_u8u11__SVUint8_t( // CPP-CHECK-NEXT: entry: @@ -5067,18 +12405,51 @@ TYPE(svfloat64) test_svreinterpret_f64_s64(TYPE(svint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z25test_svreinterpret_f64_u811svuint8x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z25test_svreinterpret_f64_u811svuint8x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z25test_svreinterpret_f64_u811svuint8x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_u8(TYPE(svuint8) op) MODE_ATTR { @@ -5092,18 +12463,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u8(TYPE(svuint8) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_u16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_u16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_u16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_u16u12__SVUint16_t( // CPP-CHECK-NEXT: entry: @@ -5112,18 +12516,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u8(TYPE(svuint8) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_u1612svuint16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_u1612svuint16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_u1612svuint16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_u16(TYPE(svuint16) op) MODE_ATTR { @@ -5137,18 +12574,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u16(TYPE(svuint16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_u32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_u32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_u32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_u32u12__SVUint32_t( // CPP-CHECK-NEXT: entry: @@ -5157,18 +12627,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u16(TYPE(svuint16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_u3212svuint32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_u3212svuint32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_u3212svuint32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_u32(TYPE(svuint32) op) MODE_ATTR { @@ -5182,18 +12685,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u32(TYPE(svuint32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_u64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_u64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_u64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_u64u12__SVUint64_t( // CPP-CHECK-NEXT: entry: @@ -5202,18 +12738,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u32(TYPE(svuint32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_u6412svuint64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_u6412svuint64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_u6412svuint64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_u64(TYPE(svuint64) op) MODE_ATTR { @@ -5227,18 +12796,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u64(TYPE(svuint64) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_f16( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_f16( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_f16( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_f16u13__SVFloat16_t( // CPP-CHECK-NEXT: entry: @@ -5247,18 +12849,51 @@ TYPE(svfloat64) test_svreinterpret_f64_u64(TYPE(svuint64) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_f1613svfloat16x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_f1613svfloat16x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_f1613svfloat16x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_f16(TYPE(svfloat16) op) MODE_ATTR { @@ -5272,18 +12907,51 @@ TYPE(svfloat64) test_svreinterpret_f64_f16(TYPE(svfloat16) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_f32( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE2-NEXT: ret [[TMP0]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// TUPLE2-NEXT: ret { , } [[TMP7]] // // TUPLE3-LABEL: @test_svreinterpret_f64_f32( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE3-NEXT: ret [[TMP0]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP11]] // // TUPLE4-LABEL: @test_svreinterpret_f64_f32( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// TUPLE4-NEXT: ret [[TMP0]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP15]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_f32u13__SVFloat32_t( // CPP-CHECK-NEXT: entry: @@ -5292,18 +12960,51 @@ TYPE(svfloat64) test_svreinterpret_f64_f16(TYPE(svfloat16) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_f3213svfloat32x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE2-NEXT: ret [[TMP0]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = bitcast [[TMP2]] to +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = insertvalue { , } poison, [[TMP3]], 0 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP6:%.*]] = bitcast [[TMP5]] to +// CPP-TUPLE2-NEXT: [[TMP7:%.*]] = insertvalue { , } [[TMP4]], [[TMP6]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP7]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_f3213svfloat32x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE3-NEXT: ret [[TMP0]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = bitcast [[TMP3]] to +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = insertvalue { , , } poison, [[TMP4]], 0 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = bitcast [[TMP6]] to +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP5]], [[TMP7]], 1 +// CPP-TUPLE3-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP10:%.*]] = bitcast [[TMP9]] to +// CPP-TUPLE3-NEXT: [[TMP11:%.*]] = insertvalue { , , } [[TMP8]], [[TMP10]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP11]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_f3213svfloat32x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = bitcast [[OP:%.*]] to -// CPP-TUPLE4-NEXT: ret [[TMP0]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = bitcast [[TMP4]] to +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[TMP5]], 0 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = bitcast [[TMP7]] to +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[TMP8]], 1 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = bitcast [[TMP10]] to +// CPP-TUPLE4-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP11]], 2 +// CPP-TUPLE4-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP14:%.*]] = bitcast [[TMP13]] to +// CPP-TUPLE4-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[TMP14]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP15]] // TYPE(svfloat64) test_svreinterpret_f64_f32(TYPE(svfloat32) op) MODE_ATTR { @@ -5316,15 +13017,42 @@ TYPE(svfloat64) test_svreinterpret_f64_f32(TYPE(svfloat32) op) MODE_ATTR // // TUPLE2-LABEL: @test_svreinterpret_f64_f64( // TUPLE2-NEXT: entry: -// TUPLE2-NEXT: ret [[OP:%.*]] +// TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// TUPLE2-NEXT: ret { , } [[TMP5]] // // TUPLE3-LABEL: @test_svreinterpret_f64_f64( // TUPLE3-NEXT: entry: -// TUPLE3-NEXT: ret [[OP:%.*]] +// TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// TUPLE3-NEXT: ret { , , } [[TMP8]] // // TUPLE4-LABEL: @test_svreinterpret_f64_f64( // TUPLE4-NEXT: entry: -// TUPLE4-NEXT: ret [[OP:%.*]] +// TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// TUPLE4-NEXT: ret { , , , } [[TMP11]] // // CPP-CHECK-LABEL: @_Z26test_svreinterpret_f64_f64u13__SVFloat64_t( // CPP-CHECK-NEXT: entry: @@ -5332,15 +13060,42 @@ TYPE(svfloat64) test_svreinterpret_f64_f32(TYPE(svfloat32) op) MODE_ATTR // // CPP-TUPLE2-LABEL: @_Z26test_svreinterpret_f64_f6413svfloat64x2_t( // CPP-TUPLE2-NEXT: entry: -// CPP-TUPLE2-NEXT: ret [[OP:%.*]] +// CPP-TUPLE2-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE2-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE2-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-TUPLE2-NEXT: [[TMP3:%.*]] = insertvalue { , } poison, [[TMP2]], 0 +// CPP-TUPLE2-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-TUPLE2-NEXT: [[TMP5:%.*]] = insertvalue { , } [[TMP3]], [[TMP4]], 1 +// CPP-TUPLE2-NEXT: ret { , } [[TMP5]] // // CPP-TUPLE3-LABEL: @_Z26test_svreinterpret_f64_f6413svfloat64x3_t( // CPP-TUPLE3-NEXT: entry: -// CPP-TUPLE3-NEXT: ret [[OP:%.*]] +// CPP-TUPLE3-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE3-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE3-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE3-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-TUPLE3-NEXT: [[TMP4:%.*]] = insertvalue { , , } poison, [[TMP3]], 0 +// CPP-TUPLE3-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-TUPLE3-NEXT: [[TMP6:%.*]] = insertvalue { , , } [[TMP4]], [[TMP5]], 1 +// CPP-TUPLE3-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-TUPLE3-NEXT: [[TMP8:%.*]] = insertvalue { , , } [[TMP6]], [[TMP7]], 2 +// CPP-TUPLE3-NEXT: ret { , , } [[TMP8]] // // CPP-TUPLE4-LABEL: @_Z26test_svreinterpret_f64_f6413svfloat64x4_t( // CPP-TUPLE4-NEXT: entry: -// CPP-TUPLE4-NEXT: ret [[OP:%.*]] +// CPP-TUPLE4-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[OP_COERCE0:%.*]], 0 +// CPP-TUPLE4-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[OP_COERCE1:%.*]], 1 +// CPP-TUPLE4-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[OP_COERCE2:%.*]], 2 +// CPP-TUPLE4-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[OP_COERCE3:%.*]], 3 +// CPP-TUPLE4-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-TUPLE4-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 +// CPP-TUPLE4-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-TUPLE4-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 +// CPP-TUPLE4-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-TUPLE4-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 +// CPP-TUPLE4-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-TUPLE4-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 +// CPP-TUPLE4-NEXT: ret { , , , } [[TMP11]] // TYPE(svfloat64) test_svreinterpret_f64_f64(TYPE(svfloat64) op) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c index 7298666b3b1db1..edd30278a97147 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c @@ -24,13 +24,17 @@ // CHECK-LABEL: @test_svset2_bf16_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svset2_bf16_014svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) ATTR { @@ -39,13 +43,17 @@ svbfloat16x2_t test_svset2_bf16_0(svbfloat16x2_t tuple, svbfloat16_t x) ATTR // CHECK-LABEL: @test_svset2_bf16_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z18test_svset2_bf16_114svbfloat16x2_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svbfloat16x2_t test_svset2_bf16_1(svbfloat16x2_t tuple, svbfloat16_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c index 71527c321f1e22..bc53d327f9efd5 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c @@ -24,13 +24,17 @@ // CHECK-LABEL: @test_svset2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svset2_s810svint8x2_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) ATTR { @@ -39,13 +43,17 @@ svint8x2_t test_svset2_s8(svint8x2_t tuple, svint8_t x) ATTR // CHECK-LABEL: @test_svset2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_s1611svint16x2_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) ATTR { @@ -54,13 +62,17 @@ svint16x2_t test_svset2_s16(svint16x2_t tuple, svint16_t x) ATTR // CHECK-LABEL: @test_svset2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_s3211svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) ATTR { @@ -69,13 +81,17 @@ svint32x2_t test_svset2_s32(svint32x2_t tuple, svint32_t x) ATTR // CHECK-LABEL: @test_svset2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_s6411svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) ATTR { @@ -84,13 +100,17 @@ svint64x2_t test_svset2_s64(svint64x2_t tuple, svint64_t x) ATTR // CHECK-LABEL: @test_svset2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z14test_svset2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) ATTR { @@ -99,13 +119,17 @@ svuint8x2_t test_svset2_u8(svuint8x2_t tuple, svuint8_t x) ATTR // CHECK-LABEL: @test_svset2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) ATTR { @@ -114,13 +138,17 @@ svuint16x2_t test_svset2_u16(svuint16x2_t tuple, svuint16_t x) ATTR // CHECK-LABEL: @test_svset2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) ATTR { @@ -129,13 +157,17 @@ svuint32x2_t test_svset2_u32(svuint32x2_t tuple, svuint32_t x) ATTR // CHECK-LABEL: @test_svset2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) ATTR { @@ -144,13 +176,17 @@ svuint64x2_t test_svset2_u64(svuint64x2_t tuple, svuint64_t x) ATTR // CHECK-LABEL: @test_svset2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_f1613svfloat16x2_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) ATTR { @@ -159,13 +195,17 @@ svfloat16x2_t test_svset2_f16(svfloat16x2_t tuple, svfloat16_t x) ATTR // CHECK-LABEL: @test_svset2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_f3213svfloat32x2_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) ATTR { @@ -174,13 +214,17 @@ svfloat32x2_t test_svset2_f32(svfloat32x2_t tuple, svfloat32_t x) ATTR // CHECK-LABEL: @test_svset2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_f6413svfloat64x2_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svfloat64x2_t test_svset2_f64(svfloat64x2_t tuple, svfloat64_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c index 72cd7e9dc001ad..35cf63c44b6aa8 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c @@ -24,13 +24,19 @@ // CHECK-LABEL: @test_svset3_bf16_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svset3_bf16_014svbfloat16x3_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { @@ -39,13 +45,19 @@ svbfloat16x3_t test_svset3_bf16_0(svbfloat16x3_t tuple, svbfloat16_t x) ATTR // CHECK-LABEL: @test_svset3_bf16_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svset3_bf16_114svbfloat16x3_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { @@ -54,13 +66,19 @@ svbfloat16x3_t test_svset3_bf16_1(svbfloat16x3_t tuple, svbfloat16_t x) ATTR // CHECK-LABEL: @test_svset3_bf16_2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z18test_svset3_bf16_214svbfloat16x3_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svbfloat16x3_t test_svset3_bf16_2(svbfloat16x3_t tuple, svbfloat16_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c index 17a808f72edac5..db38f840e75b62 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c @@ -26,13 +26,19 @@ // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svset3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z14test_svset3_s810svint8x3_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) ATTR { @@ -41,13 +47,19 @@ svint8x3_t test_svset3_s8(svint8x3_t tuple, svint8_t x) ATTR // CHECK-LABEL: @test_svset3_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_s1611svint16x3_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) ATTR { @@ -56,13 +68,19 @@ svint16x3_t test_svset3_s16(svint16x3_t tuple, svint16_t x) ATTR // CHECK-LABEL: @test_svset3_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_s3211svint32x3_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) ATTR { @@ -71,13 +89,19 @@ svint32x3_t test_svset3_s32(svint32x3_t tuple, svint32_t x) ATTR // CHECK-LABEL: @test_svset3_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_s6411svint64x3_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) ATTR { @@ -86,13 +110,19 @@ svint64x3_t test_svset3_s64(svint64x3_t tuple, svint64_t x) ATTR // CHECK-LABEL: @test_svset3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 32) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z14test_svset3_u811svuint8x3_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) ATTR { @@ -101,13 +131,19 @@ svuint8x3_t test_svset3_u8(svuint8x3_t tuple, svuint8_t x) ATTR // CHECK-LABEL: @test_svset3_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_u1612svuint16x3_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) ATTR { @@ -116,13 +152,19 @@ svuint16x3_t test_svset3_u16(svuint16x3_t tuple, svuint16_t x) ATTR // CHECK-LABEL: @test_svset3_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_u3212svuint32x3_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) ATTR { @@ -131,13 +173,19 @@ svuint32x3_t test_svset3_u32(svuint32x3_t tuple, svuint32_t x) ATTR // CHECK-LABEL: @test_svset3_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_u6412svuint64x3_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) ATTR { @@ -146,13 +194,19 @@ svuint64x3_t test_svset3_u64(svuint64x3_t tuple, svuint64_t x) ATTR // CHECK-LABEL: @test_svset3_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_f1613svfloat16x3_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) ATTR { @@ -161,13 +215,19 @@ svfloat16x3_t test_svset3_f16(svfloat16x3_t tuple, svfloat16_t x) ATTR // CHECK-LABEL: @test_svset3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_f3213svfloat32x3_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) ATTR { @@ -176,13 +236,19 @@ svfloat32x3_t test_svset3_f32(svfloat32x3_t tuple, svfloat32_t x) ATTR // CHECK-LABEL: @test_svset3_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z15test_svset3_f6413svfloat64x3_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , } [[TMP2]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svfloat64x3_t test_svset3_f64(svfloat64x3_t tuple, svfloat64_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c index 49fcb15b9b3cd8..2f6035e6a88ccb 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c @@ -24,13 +24,21 @@ // CHECK-LABEL: @test_svset4_bf16_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svset4_bf16_014svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { @@ -39,13 +47,21 @@ svbfloat16x4_t test_svset4_bf16_0(svbfloat16x4_t tuple, svbfloat16_t x) ATTR // CHECK-LABEL: @test_svset4_bf16_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svset4_bf16_114svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { @@ -54,13 +70,21 @@ svbfloat16x4_t test_svset4_bf16_1(svbfloat16x4_t tuple, svbfloat16_t x) ATTR // CHECK-LABEL: @test_svset4_bf16_2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 2 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svset4_bf16_214svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 2 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { @@ -69,13 +93,21 @@ svbfloat16x4_t test_svset4_bf16_2(svbfloat16x4_t tuple, svbfloat16_t x) ATTR // CHECK-LABEL: @test_svset4_bf16_3( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z18test_svset4_bf16_314svbfloat16x4_tu14__SVBfloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TUPLE:%.*]], [[X:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svbfloat16x4_t test_svset4_bf16_3(svbfloat16x4_t tuple, svbfloat16_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c index 7eec14b84a9f9f..c182052fa7e819 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c @@ -24,13 +24,21 @@ // CHECK-LABEL: @test_svset4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z14test_svset4_s810svint8x4_tu10__SVInt8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) ATTR { @@ -39,13 +47,21 @@ svint8x4_t test_svset4_s8(svint8x4_t tuple, svint8_t x) ATTR // CHECK-LABEL: @test_svset4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 24) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_s1611svint16x4_tu11__SVInt16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) ATTR { @@ -54,13 +70,21 @@ svint16x4_t test_svset4_s16(svint16x4_t tuple, svint16_t x) ATTR // CHECK-LABEL: @test_svset4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_s3211svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) ATTR { @@ -69,13 +93,21 @@ svint32x4_t test_svset4_s32(svint32x4_t tuple, svint32_t x) ATTR // CHECK-LABEL: @test_svset4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_s6411svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) ATTR { @@ -84,13 +116,21 @@ svint64x4_t test_svset4_s64(svint64x4_t tuple, svint64_t x) ATTR // CHECK-LABEL: @test_svset4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 48) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z14test_svset4_u811svuint8x4_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TUPLE:%.*]], [[X:%.*]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) ATTR { @@ -99,13 +139,21 @@ svuint8x4_t test_svset4_u8(svuint8x4_t tuple, svuint8_t x) ATTR // CHECK-LABEL: @test_svset4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_u1612svuint16x4_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) ATTR { @@ -114,13 +162,21 @@ svuint16x4_t test_svset4_u16(svuint16x4_t tuple, svuint16_t x) ATTR // CHECK-LABEL: @test_svset4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_u3212svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) ATTR { @@ -129,13 +185,21 @@ svuint32x4_t test_svset4_u32(svuint32x4_t tuple, svuint32_t x) ATTR // CHECK-LABEL: @test_svset4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_u6412svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TUPLE:%.*]], [[X:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) ATTR { @@ -144,13 +208,21 @@ svuint64x4_t test_svset4_u64(svuint64x4_t tuple, svuint64_t x) ATTR // CHECK-LABEL: @test_svset4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_f1613svfloat16x4_tu13__SVFloat16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TUPLE:%.*]], [[X:%.*]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) ATTR { @@ -159,13 +231,21 @@ svfloat16x4_t test_svset4_f16(svfloat16x4_t tuple, svfloat16_t x) ATTR // CHECK-LABEL: @test_svset4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_f3213svfloat32x4_tu13__SVFloat32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TUPLE:%.*]], [[X:%.*]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) ATTR { @@ -174,13 +254,21 @@ svfloat32x4_t test_svset4_f32(svfloat32x4_t tuple, svfloat32_t x) ATTR // CHECK-LABEL: @test_svset4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 6) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_f6413svfloat64x4_tu13__SVFloat64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TUPLE:%.*]], [[X:%.*]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svfloat64x4_t test_svset4_f64(svfloat64x4_t tuple, svfloat64_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c index e2c4883f7a1c85..726aae2fa78a18 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c @@ -24,18 +24,22 @@ #endif // CHECK-LABEL: @test_svst2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2_bf16u10__SVBool_tPu6__bf1614svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) MODE_ATTR @@ -45,20 +49,24 @@ void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) MODE_AT // CHECK-LABEL: @test_svst2_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data) MODE_ATTR diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c index 9d87943dc35919..d1511b4c363d00 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c @@ -24,16 +24,20 @@ // CHECK-LABEL: @test_svst2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z13test_svst2_s8u10__SVBool_tPa10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data) MODE_ATTR @@ -43,18 +47,22 @@ void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_s16u10__SVBool_tPs11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data) MODE_ATTR @@ -64,18 +72,22 @@ void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_s32u10__SVBool_tPi11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data) MODE_ATTR @@ -85,18 +97,22 @@ void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_s64u10__SVBool_tPl11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data) MODE_ATTR @@ -106,16 +122,20 @@ void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z13test_svst2_u8u10__SVBool_tPh11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data) MODE_ATTR @@ -125,18 +145,22 @@ void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_u16u10__SVBool_tPt12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data) MODE_ATTR @@ -146,18 +170,22 @@ void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_u32u10__SVBool_tPj12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data) MODE_ATTR @@ -167,18 +195,22 @@ void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_u64u10__SVBool_tPm12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data) MODE_ATTR @@ -188,18 +220,22 @@ void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_f16u10__SVBool_tPDh13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data) MODE_ATTR @@ -209,18 +245,22 @@ void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_f32u10__SVBool_tPf13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data) MODE_ATTR @@ -230,18 +270,22 @@ void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2_f64u10__SVBool_tPd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) MODE_ATTR @@ -251,18 +295,22 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) MODE_ATTR // CHECK-LABEL: @test_svst2_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svst2_vnum_s8u10__SVBool_tPal10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data) MODE_ATTR @@ -272,20 +320,24 @@ void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data // CHECK-LABEL: @test_svst2_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_s16u10__SVBool_tPsl11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data) MODE_ATTR @@ -295,20 +347,24 @@ void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t d // CHECK-LABEL: @test_svst2_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_s32u10__SVBool_tPil11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data) MODE_ATTR @@ -318,20 +374,24 @@ void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t d // CHECK-LABEL: @test_svst2_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_s64u10__SVBool_tPll11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data) MODE_ATTR @@ -341,18 +401,22 @@ void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t d // CHECK-LABEL: @test_svst2_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svst2_vnum_u8u10__SVBool_tPhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv16i8( [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data) MODE_ATTR @@ -362,20 +426,24 @@ void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t da // CHECK-LABEL: @test_svst2_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_u16u10__SVBool_tPtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8i16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data) MODE_ATTR @@ -385,20 +453,24 @@ void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t // CHECK-LABEL: @test_svst2_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_u32u10__SVBool_tPjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4i32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data) MODE_ATTR @@ -408,20 +480,24 @@ void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t // CHECK-LABEL: @test_svst2_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_u64u10__SVBool_tPml12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2i64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data) MODE_ATTR @@ -431,20 +507,24 @@ void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t // CHECK-LABEL: @test_svst2_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_f16u10__SVBool_tPDhl13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data) MODE_ATTR @@ -454,20 +534,24 @@ void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2 // CHECK-LABEL: @test_svst2_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_f32u10__SVBool_tPfl13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv4f32( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data) MODE_ATTR @@ -477,20 +561,24 @@ void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2 // CHECK-LABEL: @test_svst2_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2_vnum_f64u10__SVBool_tPdl13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2.nxv2f64( [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) // CPP-CHECK-NEXT: ret void // void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data) MODE_ATTR diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c index 9b7db79896e48d..2a71029a8f5734 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c @@ -25,20 +25,26 @@ // CHECK-LABEL: @test_svst3_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3_bf16u10__SVBool_tPu6__bf1614svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) MODE_ATTR @@ -48,22 +54,28 @@ void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) MODE_AT // CHECK-LABEL: @test_svst3_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8bf16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data) MODE_ATTR diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c index 23da13f0e057e8..4198a325f5fb61 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c @@ -24,18 +24,24 @@ // CHECK-LABEL: @test_svst3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z13test_svst3_s8u10__SVBool_tPa10svint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data) MODE_ATTR @@ -45,20 +51,26 @@ void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_s16u10__SVBool_tPs11svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data) MODE_ATTR @@ -68,20 +80,26 @@ void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_s32u10__SVBool_tPi11svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data) MODE_ATTR @@ -91,20 +109,26 @@ void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_s64u10__SVBool_tPl11svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data) MODE_ATTR @@ -114,18 +138,24 @@ void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z13test_svst3_u8u10__SVBool_tPh11svuint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data) MODE_ATTR @@ -135,20 +165,26 @@ void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_u16u10__SVBool_tPt12svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data) MODE_ATTR @@ -158,20 +194,26 @@ void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_u32u10__SVBool_tPj12svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data) MODE_ATTR @@ -181,20 +223,26 @@ void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_u64u10__SVBool_tPm12svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data) MODE_ATTR @@ -204,20 +252,26 @@ void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_f16u10__SVBool_tPDh13svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data) MODE_ATTR @@ -227,20 +281,26 @@ void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_f32u10__SVBool_tPf13svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data) MODE_ATTR @@ -250,20 +310,26 @@ void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3_f64u10__SVBool_tPd13svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) MODE_ATTR @@ -273,20 +339,26 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) MODE_ATTR // CHECK-LABEL: @test_svst3_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svst3_vnum_s8u10__SVBool_tPal10svint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data) MODE_ATTR @@ -296,22 +368,28 @@ void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data // CHECK-LABEL: @test_svst3_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_s16u10__SVBool_tPsl11svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data) MODE_ATTR @@ -321,22 +399,28 @@ void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t d // CHECK-LABEL: @test_svst3_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_s32u10__SVBool_tPil11svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data) MODE_ATTR @@ -346,22 +430,28 @@ void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t d // CHECK-LABEL: @test_svst3_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_s64u10__SVBool_tPll11svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data) MODE_ATTR @@ -371,20 +461,26 @@ void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t d // CHECK-LABEL: @test_svst3_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svst3_vnum_u8u10__SVBool_tPhl11svuint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv16i8( [[TMP3]], [[TMP4]], [[TMP5]], [[PG:%.*]], ptr [[TMP6]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data) MODE_ATTR @@ -394,22 +490,28 @@ void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t da // CHECK-LABEL: @test_svst3_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_u16u10__SVBool_tPtl12svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8i16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data) MODE_ATTR @@ -419,22 +521,28 @@ void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t // CHECK-LABEL: @test_svst3_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_u32u10__SVBool_tPjl12svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4i32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data) MODE_ATTR @@ -444,22 +552,28 @@ void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t // CHECK-LABEL: @test_svst3_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_u64u10__SVBool_tPml12svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2i64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data) MODE_ATTR @@ -469,22 +583,28 @@ void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t // CHECK-LABEL: @test_svst3_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_f16u10__SVBool_tPDhl13svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv8f16( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data) MODE_ATTR @@ -494,22 +614,28 @@ void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3 // CHECK-LABEL: @test_svst3_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_f32u10__SVBool_tPfl13svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data) MODE_ATTR @@ -519,22 +645,28 @@ void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3 // CHECK-LABEL: @test_svst3_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3_vnum_f64u10__SVBool_tPdl13svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP2]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3.nxv2f64( [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], ptr [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data) MODE_ATTR diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c index dd032ac0a2bf2f..1f4c4fde8ad1b7 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c @@ -25,22 +25,30 @@ // CHECK-LABEL: @test_svst4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4_bf16u10__SVBool_tPu6__bf1614svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) MODE_ATTR @@ -50,24 +58,32 @@ void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) MODE_AT // CHECK-LABEL: @test_svst4_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4_vnum_bf16u10__SVBool_tPu6__bf16l14svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8bf16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data) MODE_ATTR diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c index 2ed52dff687296..160a21d93e4160 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c @@ -24,20 +24,28 @@ // CHECK-LABEL: @test_svst4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z13test_svst4_s8u10__SVBool_tPa10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data) MODE_ATTR @@ -47,22 +55,30 @@ void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_s16u10__SVBool_tPs11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data) MODE_ATTR @@ -72,22 +88,30 @@ void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_s32u10__SVBool_tPi11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data) MODE_ATTR @@ -97,22 +121,30 @@ void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_s64u10__SVBool_tPl11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data) MODE_ATTR @@ -122,20 +154,28 @@ void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z13test_svst4_u8u10__SVBool_tPh11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data) MODE_ATTR @@ -145,22 +185,30 @@ void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_u16u10__SVBool_tPt12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data) MODE_ATTR @@ -170,22 +218,30 @@ void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_u32u10__SVBool_tPj12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data) MODE_ATTR @@ -195,22 +251,30 @@ void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_u64u10__SVBool_tPm12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data) MODE_ATTR @@ -220,22 +284,30 @@ void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_f16u10__SVBool_tPDh13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data) MODE_ATTR @@ -245,22 +317,30 @@ void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_f32u10__SVBool_tPf13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data) MODE_ATTR @@ -270,22 +350,30 @@ void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4_f64u10__SVBool_tPd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) MODE_ATTR @@ -295,22 +383,30 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) MODE_ATTR // CHECK-LABEL: @test_svst4_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svst4_vnum_s8u10__SVBool_tPal10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data) MODE_ATTR @@ -320,24 +416,32 @@ void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data // CHECK-LABEL: @test_svst4_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_s16u10__SVBool_tPsl11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data) MODE_ATTR @@ -347,24 +451,32 @@ void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t d // CHECK-LABEL: @test_svst4_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_s32u10__SVBool_tPil11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data) MODE_ATTR @@ -374,24 +486,32 @@ void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t d // CHECK-LABEL: @test_svst4_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_s64u10__SVBool_tPll11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data) MODE_ATTR @@ -401,22 +521,30 @@ void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t d // CHECK-LABEL: @test_svst4_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svst4_vnum_u8u10__SVBool_tPhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[DATA]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[PG:%.*]], ptr [[TMP8]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data) MODE_ATTR @@ -426,24 +554,32 @@ void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t da // CHECK-LABEL: @test_svst4_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_u16u10__SVBool_tPtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8i16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data) MODE_ATTR @@ -453,24 +589,32 @@ void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t // CHECK-LABEL: @test_svst4_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_u32u10__SVBool_tPjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4i32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data) MODE_ATTR @@ -480,24 +624,32 @@ void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t // CHECK-LABEL: @test_svst4_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_u64u10__SVBool_tPml12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2i64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data) MODE_ATTR @@ -507,24 +659,32 @@ void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t // CHECK-LABEL: @test_svst4_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_f16u10__SVBool_tPDhl13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[DATA]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv8f16( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data) MODE_ATTR @@ -534,24 +694,32 @@ void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4 // CHECK-LABEL: @test_svst4_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_f32u10__SVBool_tPfl13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[DATA]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv4f32( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data) MODE_ATTR @@ -561,24 +729,32 @@ void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4 // CHECK-LABEL: @test_svst4_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4_vnum_f64u10__SVBool_tPdl13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[DATA]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[DATA_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[DATA_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[DATA_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[DATA_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]], [[TMP8]], ptr [[TMP9]]) // CPP-CHECK-NEXT: ret void // void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data) MODE_ATTR diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c index dcaded8967fd0a..e7325a25cd33be 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c @@ -16,11 +16,11 @@ // CHECK-LABEL: @test_svundef2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z18test_svundef2_bf16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svbfloat16x2_t test_svundef2_bf16(void) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c index 677e338879c000..f67de2e0e6e3fb 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c @@ -16,11 +16,11 @@ // CHECK-LABEL: @test_svundef2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z16test_svundef2_s8v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svint8x2_t test_svundef2_s8(void) MODE_ATTR { @@ -29,11 +29,11 @@ svint8x2_t test_svundef2_s8(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_s16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svint16x2_t test_svundef2_s16(void) MODE_ATTR { @@ -42,11 +42,11 @@ svint16x2_t test_svundef2_s16(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_s32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svint32x2_t test_svundef2_s32(void) MODE_ATTR { @@ -55,11 +55,11 @@ svint32x2_t test_svundef2_s32(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_s64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svint64x2_t test_svundef2_s64(void) MODE_ATTR { @@ -68,11 +68,11 @@ svint64x2_t test_svundef2_s64(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z16test_svundef2_u8v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svuint8x2_t test_svundef2_u8(void) MODE_ATTR { @@ -81,11 +81,11 @@ svuint8x2_t test_svundef2_u8(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_u16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svuint16x2_t test_svundef2_u16(void) MODE_ATTR { @@ -94,11 +94,11 @@ svuint16x2_t test_svundef2_u16(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_u32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svuint32x2_t test_svundef2_u32(void) MODE_ATTR { @@ -107,11 +107,11 @@ svuint32x2_t test_svundef2_u32(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_u64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svuint64x2_t test_svundef2_u64(void) MODE_ATTR { @@ -120,11 +120,11 @@ svuint64x2_t test_svundef2_u64(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_f16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svfloat16x2_t test_svundef2_f16(void) MODE_ATTR { @@ -133,11 +133,11 @@ svfloat16x2_t test_svundef2_f16(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_f32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svfloat32x2_t test_svundef2_f32(void) MODE_ATTR { @@ -146,11 +146,11 @@ svfloat32x2_t test_svundef2_f32(void) MODE_ATTR // CHECK-LABEL: @test_svundef2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef2_f64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svfloat64x2_t test_svundef2_f64(void) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c index 223340095addd0..7a35431daead18 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c @@ -16,11 +16,11 @@ // CHECK-LABEL: @test_svundef3_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z18test_svundef3_bf16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svbfloat16x3_t test_svundef3_bf16(void) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c index 7104f21b759147..c6807fba84e891 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c @@ -16,11 +16,11 @@ // CHECK-LABEL: @test_svundef3_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z16test_svundef3_s8v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svint8x3_t test_svundef3_s8(void) MODE_ATTR { @@ -29,11 +29,11 @@ svint8x3_t test_svundef3_s8(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_s16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svint16x3_t test_svundef3_s16(void) MODE_ATTR { @@ -42,11 +42,11 @@ svint16x3_t test_svundef3_s16(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_s32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svint32x3_t test_svundef3_s32(void) MODE_ATTR { @@ -55,11 +55,11 @@ svint32x3_t test_svundef3_s32(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_s64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svint64x3_t test_svundef3_s64(void) MODE_ATTR { @@ -68,11 +68,11 @@ svint64x3_t test_svundef3_s64(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z16test_svundef3_u8v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svuint8x3_t test_svundef3_u8(void) MODE_ATTR { @@ -81,11 +81,11 @@ svuint8x3_t test_svundef3_u8(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_u16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svuint16x3_t test_svundef3_u16(void) MODE_ATTR { @@ -94,11 +94,11 @@ svuint16x3_t test_svundef3_u16(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_u32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svuint32x3_t test_svundef3_u32(void) MODE_ATTR { @@ -107,11 +107,11 @@ svuint32x3_t test_svundef3_u32(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_u64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svuint64x3_t test_svundef3_u64(void) MODE_ATTR { @@ -120,11 +120,11 @@ svuint64x3_t test_svundef3_u64(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_f16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svfloat16x3_t test_svundef3_f16(void) MODE_ATTR { @@ -133,11 +133,11 @@ svfloat16x3_t test_svundef3_f16(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_f32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svfloat32x3_t test_svundef3_f32(void) MODE_ATTR { @@ -146,11 +146,11 @@ svfloat32x3_t test_svundef3_f32(void) MODE_ATTR // CHECK-LABEL: @test_svundef3_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef3_f64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , } undef // svfloat64x3_t test_svundef3_f64(void) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c index d58e47d5107529..431b82bb5f1063 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c @@ -16,11 +16,11 @@ // CHECK-LABEL: @test_svundef4_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z18test_svundef4_bf16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svbfloat16x4_t test_svundef4_bf16(void) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c index fd736ac0615aa8..373470b6a4b6be 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c @@ -16,11 +16,11 @@ // CHECK-LABEL: @test_svundef4_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z16test_svundef4_s8v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svint8x4_t test_svundef4_s8(void) MODE_ATTR { @@ -29,11 +29,11 @@ svint8x4_t test_svundef4_s8(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_s16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svint16x4_t test_svundef4_s16(void) MODE_ATTR { @@ -42,11 +42,11 @@ svint16x4_t test_svundef4_s16(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_s32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svint32x4_t test_svundef4_s32(void) MODE_ATTR { @@ -55,11 +55,11 @@ svint32x4_t test_svundef4_s32(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_s64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svint64x4_t test_svundef4_s64(void) MODE_ATTR { @@ -68,11 +68,11 @@ svint64x4_t test_svundef4_s64(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z16test_svundef4_u8v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svuint8x4_t test_svundef4_u8(void) MODE_ATTR { @@ -81,11 +81,11 @@ svuint8x4_t test_svundef4_u8(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_u16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svuint16x4_t test_svundef4_u16(void) MODE_ATTR { @@ -94,11 +94,11 @@ svuint16x4_t test_svundef4_u16(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_u32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svuint32x4_t test_svundef4_u32(void) MODE_ATTR { @@ -107,11 +107,11 @@ svuint32x4_t test_svundef4_u32(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_u64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svuint64x4_t test_svundef4_u64(void) MODE_ATTR { @@ -120,11 +120,11 @@ svuint64x4_t test_svundef4_u64(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_f16v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svfloat16x4_t test_svundef4_f16(void) MODE_ATTR { @@ -133,11 +133,11 @@ svfloat16x4_t test_svundef4_f16(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_f32v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svfloat32x4_t test_svundef4_f32(void) MODE_ATTR { @@ -146,11 +146,11 @@ svfloat32x4_t test_svundef4_f32(void) MODE_ATTR // CHECK-LABEL: @test_svundef4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // // CPP-CHECK-LABEL: @_Z17test_svundef4_f64v( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svfloat64x4_t test_svundef4_f64(void) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c index 60c4828c407e8e..4b3f97d13c7eb1 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_luti.c @@ -248,17 +248,13 @@ svbfloat16_t test_svluti4_lane_bf16(svbfloat16_t table, svuint8_t indices) MODE_ // SME-CHECK-NEXT: ret [[TMP2]] // CHECK-LABEL: @test_svluti4_lane_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 0) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 0) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_s16_x211svint16x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 0) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 0) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_svluti4_lane_s16_x2(svint16x2_t table, svuint8_t indices) MODE_ATTR{ return SVE_ACLE_FUNC(svluti4_lane,_s16,_x2)(table, indices, 0); @@ -272,17 +268,13 @@ svint16_t test_svluti4_lane_s16_x2(svint16x2_t table, svuint8_t indices) MODE_AT // SME-CHECK-NEXT: ret [[TMP2]] // CHECK-LABEL: @test_svluti4_lane_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 3) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 3) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_u16_x212svuint16x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[TABLE]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 3) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8i16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 3) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svluti4_lane_u16_x2(svuint16x2_t table, svuint8_t indices) MODE_ATTR{ return SVE_ACLE_FUNC(svluti4_lane,_u16,_x2)(table, indices, 3); @@ -296,17 +288,13 @@ svuint16_t test_svluti4_lane_u16_x2(svuint16x2_t table, svuint8_t indices) MODE_ // SME-CHECK-NEXT: ret [[TMP2]] // CHECK-LABEL: @test_svluti4_lane_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TABLE:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TABLE]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8f16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 2) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8f16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 2) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svluti4_lane_f16_x213svfloat16x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TABLE:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[TABLE]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8f16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 2) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8f16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 2) +// CPP-CHECK-NEXT: ret [[TMP0]] // svfloat16_t test_svluti4_lane_f16_x2(svfloat16x2_t table, svuint8_t indices) MODE_ATTR{ return SVE_ACLE_FUNC(svluti4_lane,_f16,_x2)(table, indices, 2); @@ -320,17 +308,13 @@ svfloat16_t test_svluti4_lane_f16_x2(svfloat16x2_t table, svuint8_t indices) MOD // SME-CHECK-NEXT: ret [[TMP2]] // CHECK-LABEL: @test_svluti4_lane_bf16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TABLE:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TABLE]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 1) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 1) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svluti4_lane_bf16_x214svbfloat16x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TABLE:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[TABLE]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16( [[TMP0]], [[TMP1]], [[INDICES:%.*]], i32 1) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16( [[TABLE_COERCE0:%.*]], [[TABLE_COERCE1:%.*]], [[INDICES:%.*]], i32 1) +// CPP-CHECK-NEXT: ret [[TMP0]] // svbfloat16_t test_svluti4_lane_bf16_x2(svbfloat16x2_t table, svuint8_t indices) MODE_ATTR{ return SVE_ACLE_FUNC(svluti4_lane,_bf16,_x2)(table, indices, 1); diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c index ac363f224c3033..96af8c0bfa97d6 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c @@ -16,17 +16,13 @@ // CHECK-LABEL: @test_svtbl2_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svtbl2_bf1614svbfloat16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8bf16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svbfloat16_t test_svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices) { return SVE_ACLE_FUNC(svtbl2, _bf16, , )(data, indices); diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c index 75109831a4eef0..999a87fcfbbc8e 100644 --- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2.c @@ -17,17 +17,13 @@ // CHECK-LABEL: @test_svtbl2_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svtbl2_s810svint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint8_t test_svtbl2_s8(svint8x2_t data, svuint8_t indices) { @@ -36,17 +32,13 @@ svint8_t test_svtbl2_s8(svint8x2_t data, svuint8_t indices) // CHECK-LABEL: @test_svtbl2_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_s1611svint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_svtbl2_s16(svint16x2_t data, svuint16_t indices) { @@ -55,17 +47,13 @@ svint16_t test_svtbl2_s16(svint16x2_t data, svuint16_t indices) // CHECK-LABEL: @test_svtbl2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_s3211svint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint32_t test_svtbl2_s32(svint32x2_t data, svuint32_t indices) { @@ -74,17 +62,13 @@ svint32_t test_svtbl2_s32(svint32x2_t data, svuint32_t indices) // CHECK-LABEL: @test_svtbl2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_s6411svint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint64_t test_svtbl2_s64(svint64x2_t data, svuint64_t indices) { @@ -93,17 +77,13 @@ svint64_t test_svtbl2_s64(svint64x2_t data, svuint64_t indices) // CHECK-LABEL: @test_svtbl2_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svtbl2_u811svuint8x2_tu11__SVUint8_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[DATA]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv16i8( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint8_t test_svtbl2_u8(svuint8x2_t data, svuint8_t indices) { @@ -112,17 +92,13 @@ svuint8_t test_svtbl2_u8(svuint8x2_t data, svuint8_t indices) // CHECK-LABEL: @test_svtbl2_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_u1612svuint16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8i16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svtbl2_u16(svuint16x2_t data, svuint16_t indices) { @@ -131,17 +107,13 @@ svuint16_t test_svtbl2_u16(svuint16x2_t data, svuint16_t indices) // CHECK-LABEL: @test_svtbl2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_u3212svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4i32( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint32_t test_svtbl2_u32(svuint32x2_t data, svuint32_t indices) { @@ -150,17 +122,13 @@ svuint32_t test_svtbl2_u32(svuint32x2_t data, svuint32_t indices) // CHECK-LABEL: @test_svtbl2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_u6412svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2i64( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint64_t test_svtbl2_u64(svuint64x2_t data, svuint64_t indices) { @@ -169,17 +137,13 @@ svuint64_t test_svtbl2_u64(svuint64x2_t data, svuint64_t indices) // CHECK-LABEL: @test_svtbl2_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8f16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8f16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_f1613svfloat16x2_tu12__SVUint16_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[DATA]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8f16( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv8f16( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svfloat16_t test_svtbl2_f16(svfloat16x2_t data, svuint16_t indices) { @@ -188,17 +152,13 @@ svfloat16_t test_svtbl2_f16(svfloat16x2_t data, svuint16_t indices) // CHECK-LABEL: @test_svtbl2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4f32( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4f32( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_f3213svfloat32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[DATA]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4f32( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv4f32( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svfloat32_t test_svtbl2_f32(svfloat32x2_t data, svuint32_t indices) { @@ -207,17 +167,13 @@ svfloat32_t test_svtbl2_f32(svfloat32x2_t data, svuint32_t indices) // CHECK-LABEL: @test_svtbl2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2f64( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2f64( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z15test_svtbl2_f6413svfloat64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[DATA]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2f64( [[TMP0]], [[TMP1]], [[INDICES:%.*]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.tbl2.nxv2f64( [[DATA_COERCE0:%.*]], [[DATA_COERCE1:%.*]], [[INDICES:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svfloat64_t test_svtbl2_f64(svfloat64x2_t data, svuint64_t indices) { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c index feed95d76c789d..5ac496427ad505 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c @@ -33,15 +33,15 @@ // CHECK-LABEL: @test_svcreate2_b( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svcreate2_bu10__SVBool_tS_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP1]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svboolx2_t test_svcreate2_b(svbool_t x0, svbool_t x1) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c index 44937d8fb8399c..ce14ca859667c9 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create4_bool.c @@ -33,19 +33,19 @@ // CHECK-LABEL: @test_svcreate4_b( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( poison, [[X0:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP0]], [[X1:%.*]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP1]], [[X2:%.*]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP2]], [[X4:%.*]], i64 48) -// CHECK-NEXT: ret [[TMP3]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z16test_svcreate4_bu10__SVBool_tS_S_S_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( poison, [[X0:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP0]], [[X1:%.*]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP1]], [[X2:%.*]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TMP2]], [[X4:%.*]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP3]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[X0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[X1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[X2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[X4:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svboolx4_t test_svcreate4_b(svbool_t x0, svbool_t x1, svbool_t x2, svbool_t x4) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c index 1fc98a48d9d5cf..9907783331c300 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get2_bool.c @@ -31,13 +31,17 @@ // CHECK-LABEL: @test_svget2_b_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_b_010svboolx2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: ret [[TMP2]] // svbool_t test_svget2_b_0(svboolx2_t tuple) ATTR { @@ -46,13 +50,17 @@ svbool_t test_svget2_b_0(svboolx2_t tuple) ATTR // CHECK-LABEL: @test_svget2_b_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: ret [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svget2_b_110svboolx2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv32i1( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: ret [[TMP2]] // svbool_t test_svget2_b_1(svboolx2_t tuple) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c index 555725c8856757..bfe63bdfb9647a 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c @@ -35,13 +35,21 @@ // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_b_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_b_010svboolx4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbool_t test_svget4_b_0(svboolx4_t tuple) MODE_ATTR { @@ -52,13 +60,21 @@ svbool_t test_svget4_b_0(svboolx4_t tuple) MODE_ATTR // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_b_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_b_110svboolx4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 1 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbool_t test_svget4_b_1(svboolx4_t tuple) MODE_ATTR { @@ -69,13 +85,21 @@ svbool_t test_svget4_b_1(svboolx4_t tuple) MODE_ATTR // several parameters, one for each member of the original struct. // CHECK-LABEL: @test_svget4_b_3( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 48) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CHECK-NEXT: ret [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svget4_b_310svboolx4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i1.nxv64i1( [[TUPLE:%.*]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 3 +// CPP-CHECK-NEXT: ret [[TMP4]] // svbool_t test_svget4_b_3(svboolx4_t tuple) MODE_ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c index 9030ee44db882a..93cb653032df77 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -27,20 +27,12 @@ // CHECK-LABEL: @test_svld1_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svld1_u8_x2u11__SVCount_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base) ATTR { @@ -50,20 +42,12 @@ svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base) ATTR // CHECK-LABEL: @test_svld1_u16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_u16_x2u11__SVCount_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base) ATTR { @@ -73,20 +57,12 @@ svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base) ATTR // CHECK-LABEL: @test_svld1_u32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_u32_x2u11__SVCount_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base) ATTR { @@ -96,20 +72,12 @@ svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base) ATTR // CHECK-LABEL: @test_svld1_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_u64_x2u11__SVCount_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base) ATTR { @@ -119,28 +87,12 @@ svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base) ATTR // CHECK-LABEL: @test_svld1_u8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svld1_u8_x4u11__SVCount_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base) ATTR { @@ -150,28 +102,12 @@ svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base) ATTR // CHECK-LABEL: @test_svld1_u16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_u16_x4u11__SVCount_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base) ATTR { @@ -181,28 +117,12 @@ svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base) ATTR // CHECK-LABEL: @test_svld1_u32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_u32_x4u11__SVCount_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base) ATTR { @@ -212,28 +132,12 @@ svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base) ATTR // CHECK-LABEL: @test_svld1_u64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_u64_x4u11__SVCount_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base) ATTR { @@ -243,20 +147,12 @@ svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base) ATTR // CHECK-LABEL: @test_svld1_s8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svld1_s8_x2u11__SVCount_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base) ATTR { @@ -266,20 +162,12 @@ svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base) ATTR // CHECK-LABEL: @test_svld1_s16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_s16_x2u11__SVCount_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base) ATTR { @@ -289,20 +177,12 @@ svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base) ATTR // CHECK-LABEL: @test_svld1_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_s32_x2u11__SVCount_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base) ATTR { @@ -312,20 +192,12 @@ svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base) ATTR // CHECK-LABEL: @test_svld1_s64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_s64_x2u11__SVCount_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base) ATTR { @@ -335,28 +207,12 @@ svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base) ATTR // CHECK-LABEL: @test_svld1_s8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svld1_s8_x4u11__SVCount_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base) ATTR { @@ -366,28 +222,12 @@ svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base) ATTR // CHECK-LABEL: @test_svld1_s16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_s16_x4u11__SVCount_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base) ATTR { @@ -397,28 +237,12 @@ svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base) ATTR // CHECK-LABEL: @test_svld1_s32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_s32_x4u11__SVCount_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base) ATTR { @@ -428,28 +252,12 @@ svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base) ATTR // CHECK-LABEL: @test_svld1_s64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_s64_x4u11__SVCount_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base) ATTR { @@ -459,20 +267,12 @@ svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base) ATTR // CHECK-LABEL: @test_svld1_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_f16_x2u11__SVCount_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base) ATTR { @@ -482,20 +282,12 @@ svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base) ATTR // CHECK-LABEL: @test_svld1_f32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_f32_x2u11__SVCount_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base) ATTR { @@ -505,20 +297,12 @@ svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base) ATTR // CHECK-LABEL: @test_svld1_f64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_f64_x2u11__SVCount_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR { @@ -528,28 +312,12 @@ svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR // CHECK-LABEL: @test_svld1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_f16_x4u11__SVCount_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base) ATTR { @@ -559,28 +327,12 @@ svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base) ATTR // CHECK-LABEL: @test_svld1_f32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_f32_x4u11__SVCount_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base) ATTR { @@ -590,28 +342,12 @@ svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base) ATTR // CHECK-LABEL: @test_svld1_f64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svld1_f64_x4u11__SVCount_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR { @@ -629,11 +365,7 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x2u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: @@ -642,11 +374,7 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { @@ -660,11 +388,7 @@ svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x2u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: @@ -673,11 +397,7 @@ svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { @@ -691,11 +411,7 @@ svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x2u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: @@ -704,11 +420,7 @@ svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { @@ -722,11 +434,7 @@ svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x2u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: @@ -735,11 +443,7 @@ svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { @@ -753,15 +457,7 @@ svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x4u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: @@ -770,15 +466,7 @@ svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { @@ -792,15 +480,7 @@ svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x4u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: @@ -809,15 +489,7 @@ svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { @@ -831,15 +503,7 @@ svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x4u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: @@ -848,15 +512,7 @@ svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { @@ -870,15 +526,7 @@ svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x4u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: @@ -887,15 +535,7 @@ svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { @@ -909,11 +549,7 @@ svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x2u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: @@ -922,11 +558,7 @@ svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { @@ -940,11 +572,7 @@ svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x2u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: @@ -953,11 +581,7 @@ svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { @@ -971,11 +595,7 @@ svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vn // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x2u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: @@ -984,11 +604,7 @@ svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vn // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { @@ -1002,11 +618,7 @@ svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vn // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x2u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: @@ -1015,11 +627,7 @@ svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vn // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { @@ -1033,15 +641,7 @@ svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vn // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x4u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: @@ -1050,15 +650,7 @@ svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vn // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { @@ -1072,15 +664,7 @@ svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x4u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: @@ -1089,15 +673,7 @@ svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { @@ -1111,15 +687,7 @@ svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vn // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x4u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: @@ -1128,15 +696,7 @@ svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vn // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { @@ -1150,15 +710,7 @@ svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vn // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x4u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: @@ -1167,15 +719,7 @@ svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vn // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { @@ -1189,11 +733,7 @@ svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vn // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x2u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -1202,11 +742,7 @@ svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vn // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { @@ -1220,11 +756,7 @@ svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x2u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: @@ -1233,11 +765,7 @@ svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { @@ -1251,11 +779,7 @@ svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x2u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: @@ -1264,11 +788,7 @@ svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { @@ -1282,15 +802,7 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x4u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -1299,15 +811,7 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { @@ -1321,15 +825,7 @@ svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x4u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: @@ -1338,15 +834,7 @@ svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { @@ -1360,15 +848,7 @@ svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x4u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: @@ -1377,15 +857,7 @@ svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c index 5ce7c3b8fcab71..8254c6aec5dc1c 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -25,20 +25,12 @@ // CHECK-LABEL: @test_svldnt1_u8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svldnt1_u8_x2u11__SVCount_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base) ATTR { @@ -48,20 +40,12 @@ svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_u16_x2u11__SVCount_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base) ATTR { @@ -71,20 +55,12 @@ svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_u32_x2u11__SVCount_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base) ATTR { @@ -94,20 +70,12 @@ svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_u64_x2u11__SVCount_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base) ATTR { @@ -117,28 +85,12 @@ svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svldnt1_u8_x4u11__SVCount_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base) ATTR { @@ -148,28 +100,12 @@ svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_u16_x4u11__SVCount_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base) ATTR { @@ -179,28 +115,12 @@ svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_u32_x4u11__SVCount_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base) ATTR { @@ -210,28 +130,12 @@ svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base) ATTR // CHECK-LABEL: @test_svldnt1_u64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_u64_x4u11__SVCount_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base) ATTR { @@ -241,20 +145,12 @@ svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s8_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svldnt1_s8_x2u11__SVCount_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base) ATTR { @@ -264,20 +160,12 @@ svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_s16_x2u11__SVCount_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base) ATTR { @@ -287,20 +175,12 @@ svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_s32_x2u11__SVCount_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base) ATTR { @@ -310,20 +190,12 @@ svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_s64_x2u11__SVCount_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base) ATTR { @@ -333,28 +205,12 @@ svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s8_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svldnt1_s8_x4u11__SVCount_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base) ATTR { @@ -364,28 +220,12 @@ svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_s16_x4u11__SVCount_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base) ATTR { @@ -395,28 +235,12 @@ svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_s32_x4u11__SVCount_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base) ATTR { @@ -426,28 +250,12 @@ svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base) ATTR // CHECK-LABEL: @test_svldnt1_s64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_s64_x4u11__SVCount_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base) ATTR { @@ -457,20 +265,12 @@ svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base) ATTR // CHECK-LABEL: @test_svldnt1_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_f16_x2u11__SVCount_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base) ATTR { @@ -480,20 +280,12 @@ svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base) ATTR // CHECK-LABEL: @test_svldnt1_f32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_f32_x2u11__SVCount_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base) ATTR { @@ -503,20 +295,12 @@ svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base) ATTR // CHECK-LABEL: @test_svldnt1_f64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_f64_x2u11__SVCount_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR { @@ -526,28 +310,12 @@ svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR // CHECK-LABEL: @test_svldnt1_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_f16_x4u11__SVCount_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base) ATTR { @@ -557,28 +325,12 @@ svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base) ATTR // CHECK-LABEL: @test_svldnt1_f32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_f32_x4u11__SVCount_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base) ATTR { @@ -588,28 +340,12 @@ svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base) ATTR // CHECK-LABEL: @test_svldnt1_f64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svldnt1_f64_x4u11__SVCount_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR { @@ -627,11 +363,7 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x2u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: @@ -640,11 +372,7 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { @@ -658,11 +386,7 @@ svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x2u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: @@ -671,11 +395,7 @@ svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { @@ -689,11 +409,7 @@ svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x2u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: @@ -702,11 +418,7 @@ svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { @@ -720,11 +432,7 @@ svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x2u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: @@ -733,11 +441,7 @@ svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { @@ -751,15 +455,7 @@ svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x4u11__SVCount_tPKhl( // CPP-CHECK-NEXT: entry: @@ -768,15 +464,7 @@ svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) ATTR { @@ -790,15 +478,7 @@ svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x4u11__SVCount_tPKtl( // CPP-CHECK-NEXT: entry: @@ -807,15 +487,7 @@ svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) ATTR { @@ -829,15 +501,7 @@ svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x4u11__SVCount_tPKjl( // CPP-CHECK-NEXT: entry: @@ -846,15 +510,7 @@ svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) ATTR { @@ -868,15 +524,7 @@ svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x4u11__SVCount_tPKml( // CPP-CHECK-NEXT: entry: @@ -885,15 +533,7 @@ svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) ATTR { @@ -907,11 +547,7 @@ svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x2u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: @@ -920,11 +556,7 @@ svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { @@ -938,11 +570,7 @@ svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x2u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: @@ -951,11 +579,7 @@ svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { @@ -969,11 +593,7 @@ svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x2u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: @@ -982,11 +602,7 @@ svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { @@ -1000,11 +616,7 @@ svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x2u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: @@ -1013,11 +625,7 @@ svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { @@ -1031,15 +639,7 @@ svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x4u11__SVCount_tPKal( // CPP-CHECK-NEXT: entry: @@ -1048,15 +648,7 @@ svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) ATTR { @@ -1070,15 +662,7 @@ svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x4u11__SVCount_tPKsl( // CPP-CHECK-NEXT: entry: @@ -1087,15 +671,7 @@ svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 16) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP9]], [[TMP10]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) ATTR { @@ -1109,15 +685,7 @@ svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x4u11__SVCount_tPKil( // CPP-CHECK-NEXT: entry: @@ -1126,15 +694,7 @@ svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 8) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP9]], [[TMP10]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) ATTR { @@ -1148,15 +708,7 @@ svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x4u11__SVCount_tPKll( // CPP-CHECK-NEXT: entry: @@ -1165,15 +717,7 @@ svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 4) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP9]], [[TMP10]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) ATTR { @@ -1187,11 +731,7 @@ svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x2u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -1200,11 +740,7 @@ svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { @@ -1218,11 +754,7 @@ svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int6 // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x2u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: @@ -1231,11 +763,7 @@ svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int6 // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { @@ -1249,11 +777,7 @@ svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int6 // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x2u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: @@ -1262,11 +786,7 @@ svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int6 // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { @@ -1280,15 +800,7 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x4u11__SVCount_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -1297,15 +809,7 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6 // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 16) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP9]], [[TMP10]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) ATTR { @@ -1319,15 +823,7 @@ svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int6 // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x4u11__SVCount_tPKfl( // CPP-CHECK-NEXT: entry: @@ -1336,15 +832,7 @@ svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int6 // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 8) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP9]], [[TMP10]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) ATTR { @@ -1358,15 +846,7 @@ svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int6 // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x4u11__SVCount_tPKdl( // CPP-CHECK-NEXT: entry: @@ -1375,15 +855,7 @@ svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int6 // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 2) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 4) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP9]], [[TMP10]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c index 467161ccc238da..233c9b29e707a9 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_loads.c @@ -18,20 +18,12 @@ // CHECK-LABEL: @test_svld2q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svld2q_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svuint8x2_t test_svld2q_u8(svbool_t pg, const uint8_t *base) { @@ -41,20 +33,12 @@ svuint8x2_t test_svld2q_u8(svbool_t pg, const uint8_t *base) // CHECK-LABEL: @test_svld2q_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: ret { , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svld2q_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: ret { , } [[TMP0]] // svint8x2_t test_svld2q_s8(svbool_t pg, const int8_t *base) { @@ -64,21 +48,13 @@ svint8x2_t test_svld2q_s8(svbool_t pg, const int8_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint16x2_t test_svld2q_u16(svbool_t pg, const uint16_t *base) { @@ -89,21 +65,13 @@ svuint16x2_t test_svld2q_u16(svbool_t pg, const uint16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint16x2_t test_svld2q_s16(svbool_t pg, const int16_t *base) { @@ -114,21 +82,13 @@ svint16x2_t test_svld2q_s16(svbool_t pg, const int16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint32x2_t test_svld2q_u32(svbool_t pg, const uint32_t *base) { @@ -139,21 +99,13 @@ svuint32x2_t test_svld2q_u32(svbool_t pg, const uint32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint32x2_t test_svld2q_s32(svbool_t pg, const int32_t *base) { @@ -164,21 +116,13 @@ svint32x2_t test_svld2q_s32(svbool_t pg, const int32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svuint64x2_t test_svld2q_u64(svbool_t pg, const uint64_t *base) { @@ -189,21 +133,13 @@ svuint64x2_t test_svld2q_u64(svbool_t pg, const uint64_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svint64x2_t test_svld2q_s64(svbool_t pg, const int64_t *base) { @@ -214,21 +150,13 @@ svint64x2_t test_svld2q_s64(svbool_t pg, const int64_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat16x2_t test_svld2q_f16(svbool_t pg, const float16_t *base) { @@ -239,21 +167,13 @@ svfloat16x2_t test_svld2q_f16(svbool_t pg, const float16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svld2q_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svbfloat16x2_t test_svld2q_bf16(svbool_t pg, const bfloat16_t *base) { @@ -264,21 +184,13 @@ svbfloat16x2_t test_svld2q_bf16(svbool_t pg, const bfloat16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat32x2_t test_svld2q_f32(svbool_t pg, const float32_t *base) { @@ -289,21 +201,13 @@ svfloat32x2_t test_svld2q_f32(svbool_t pg, const float32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: ret [[TMP5]] +// CHECK-NEXT: ret { , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld2q_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP5]] +// CPP-CHECK-NEXT: ret { , } [[TMP1]] // svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) { @@ -317,11 +221,7 @@ svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: @@ -330,11 +230,7 @@ svfloat64x2_t test_svld2q_f64(svbool_t pg, const float64_t *base) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svuint8x2_t test_svld2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -348,11 +244,7 @@ svuint8x2_t test_svld2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svld2q_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: @@ -361,11 +253,7 @@ svuint8x2_t test_svld2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , } [[TMP3]] // svint8x2_t test_svld2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -379,11 +267,7 @@ svint8x2_t test_svld2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: @@ -393,11 +277,7 @@ svint8x2_t test_svld2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svuint16x2_t test_svld2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -412,11 +292,7 @@ svuint16x2_t test_svld2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: @@ -426,11 +302,7 @@ svuint16x2_t test_svld2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svint16x2_t test_svld2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -445,11 +317,7 @@ svint16x2_t test_svld2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: @@ -459,11 +327,7 @@ svint16x2_t test_svld2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svuint32x2_t test_svld2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -478,11 +342,7 @@ svuint32x2_t test_svld2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: @@ -492,11 +352,7 @@ svuint32x2_t test_svld2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svint32x2_t test_svld2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -511,11 +367,7 @@ svint32x2_t test_svld2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: @@ -525,11 +377,7 @@ svint32x2_t test_svld2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svuint64x2_t test_svld2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -544,11 +392,7 @@ svuint64x2_t test_svld2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: @@ -558,11 +402,7 @@ svuint64x2_t test_svld2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svint64x2_t test_svld2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -577,11 +417,7 @@ svint64x2_t test_svld2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -591,11 +427,7 @@ svint64x2_t test_svld2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8f16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat16x2_t test_svld2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -610,11 +442,7 @@ svfloat16x2_t test_svld2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z21test_svld2q_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: @@ -624,11 +452,7 @@ svfloat16x2_t test_svld2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv8bf16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svbfloat16x2_t test_svld2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { @@ -643,11 +467,7 @@ svbfloat16x2_t test_svld2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: @@ -657,11 +477,7 @@ svbfloat16x2_t test_svld2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv4f32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat32x2_t test_svld2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -676,11 +492,7 @@ svfloat32x2_t test_svld2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld2q_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: @@ -690,11 +502,7 @@ svfloat32x2_t test_svld2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , } @llvm.aarch64.sve.ld2q.sret.nxv2f64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , } [[TMP4]] // svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { @@ -704,24 +512,12 @@ svfloat64x2_t test_svld2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v // CHECK-LABEL: @test_svld3q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svld3q_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] // svuint8x3_t test_svld3q_u8(svbool_t pg, const uint8_t *base) { @@ -731,24 +527,12 @@ svuint8x3_t test_svld3q_u8(svbool_t pg, const uint8_t *base) // CHECK-LABEL: @test_svld3q_s8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: ret { , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svld3q_s8u10__SVBool_tPKa( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: ret { , , } [[TMP0]] // svint8x3_t test_svld3q_s8(svbool_t pg, const int8_t *base) { @@ -759,25 +543,13 @@ svint8x3_t test_svld3q_s8(svbool_t pg, const int8_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint16x3_t test_svld3q_u16(svbool_t pg, const uint16_t *base) { @@ -788,25 +560,13 @@ svuint16x3_t test_svld3q_u16(svbool_t pg, const uint16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint16x3_t test_svld3q_s16(svbool_t pg, const int16_t *base) { @@ -817,25 +577,13 @@ svint16x3_t test_svld3q_s16(svbool_t pg, const int16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint32x3_t test_svld3q_u32(svbool_t pg, const uint32_t *base) { @@ -846,25 +594,13 @@ svuint32x3_t test_svld3q_u32(svbool_t pg, const uint32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint32x3_t test_svld3q_s32(svbool_t pg, const int32_t *base) { @@ -875,25 +611,13 @@ svint32x3_t test_svld3q_s32(svbool_t pg, const int32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svuint64x3_t test_svld3q_u64(svbool_t pg, const uint64_t *base) { @@ -904,25 +628,13 @@ svuint64x3_t test_svld3q_u64(svbool_t pg, const uint64_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svint64x3_t test_svld3q_s64(svbool_t pg, const int64_t *base) { @@ -933,25 +645,13 @@ svint64x3_t test_svld3q_s64(svbool_t pg, const int64_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svfloat16x3_t test_svld3q_f16(svbool_t pg, const float16_t *base) { @@ -962,25 +662,13 @@ svfloat16x3_t test_svld3q_f16(svbool_t pg, const float16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svld3q_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svbfloat16x3_t test_svld3q_bf16(svbool_t pg, const bfloat16_t *base) { @@ -991,25 +679,13 @@ svbfloat16x3_t test_svld3q_bf16(svbool_t pg, const bfloat16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svfloat32x3_t test_svld3q_f32(svbool_t pg, const float32_t *base) { @@ -1020,25 +696,13 @@ svfloat32x3_t test_svld3q_f32(svbool_t pg, const float32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: ret [[TMP7]] +// CHECK-NEXT: ret { , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld3q_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP7]] +// CPP-CHECK-NEXT: ret { , , } [[TMP1]] // svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) { @@ -1052,13 +716,7 @@ svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: @@ -1067,13 +725,7 @@ svfloat64x3_t test_svld3q_f64(svbool_t pg, const float64_t *base) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svuint8x3_t test_svld3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -1087,13 +739,7 @@ svuint8x3_t test_svld3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svld3q_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: @@ -1102,13 +748,7 @@ svuint8x3_t test_svld3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv48i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , } [[TMP3]] // svint8x3_t test_svld3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -1123,13 +763,7 @@ svint8x3_t test_svld3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: @@ -1139,13 +773,7 @@ svint8x3_t test_svld3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svuint16x3_t test_svld3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -1160,13 +788,7 @@ svuint16x3_t test_svld3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: @@ -1176,13 +798,7 @@ svuint16x3_t test_svld3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svint16x3_t test_svld3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -1197,13 +813,7 @@ svint16x3_t test_svld3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: @@ -1213,13 +823,7 @@ svint16x3_t test_svld3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svuint32x3_t test_svld3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -1234,13 +838,7 @@ svuint32x3_t test_svld3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: @@ -1250,13 +848,7 @@ svuint32x3_t test_svld3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv12i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svint32x3_t test_svld3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -1271,13 +863,7 @@ svint32x3_t test_svld3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: @@ -1287,13 +873,7 @@ svint32x3_t test_svld3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svuint64x3_t test_svld3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -1308,13 +888,7 @@ svuint64x3_t test_svld3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: @@ -1324,13 +898,7 @@ svuint64x3_t test_svld3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv6i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svint64x3_t test_svld3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -1345,13 +913,7 @@ svint64x3_t test_svld3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -1361,13 +923,7 @@ svint64x3_t test_svld3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8f16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svfloat16x3_t test_svld3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -1382,13 +938,7 @@ svfloat16x3_t test_svld3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z21test_svld3q_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: @@ -1398,13 +948,7 @@ svfloat16x3_t test_svld3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv8bf16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv24bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svbfloat16x3_t test_svld3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { @@ -1419,13 +963,7 @@ svbfloat16x3_t test_svld3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: @@ -1435,13 +973,7 @@ svbfloat16x3_t test_svld3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv4f32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv12f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svfloat32x3_t test_svld3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -1456,13 +988,7 @@ svfloat32x3_t test_svld3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: ret [[TMP10]] +// CHECK-NEXT: ret { , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld3q_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: @@ -1472,13 +998,7 @@ svfloat32x3_t test_svld3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , } @llvm.aarch64.sve.ld3q.sret.nxv2f64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv6f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP10]] +// CPP-CHECK-NEXT: ret { , , } [[TMP4]] // svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { @@ -1488,28 +1008,12 @@ svfloat64x3_t test_svld3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t v // CHECK-LABEL: @test_svld4q_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CHECK-NEXT: ret [[TMP8]] +// CHECK-NEXT: ret { , , , } [[TMP0]] // // CPP-CHECK-LABEL: @_Z14test_svld4q_u8u10__SVBool_tPKh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP8]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP0]] // svuint8x4_t test_svld4q_u8(svbool_t pg, const uint8_t *base) { @@ -1520,29 +1024,13 @@ svuint8x4_t test_svld4q_u8(svbool_t pg, const uint8_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_u16u10__SVBool_tPKt( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint16x4_t test_svld4q_u16(svbool_t pg, const uint16_t *base) { @@ -1553,29 +1041,13 @@ svuint16x4_t test_svld4q_u16(svbool_t pg, const uint16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_s16u10__SVBool_tPKs( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint16x4_t test_svld4q_s16(svbool_t pg, const int16_t *base) { @@ -1586,29 +1058,13 @@ svint16x4_t test_svld4q_s16(svbool_t pg, const int16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_u32u10__SVBool_tPKj( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint32x4_t test_svld4q_u32(svbool_t pg, const uint32_t *base) { @@ -1619,29 +1075,13 @@ svuint32x4_t test_svld4q_u32(svbool_t pg, const uint32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_s32u10__SVBool_tPKi( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint32x4_t test_svld4q_s32(svbool_t pg, const int32_t *base) { @@ -1652,29 +1092,13 @@ svint32x4_t test_svld4q_s32(svbool_t pg, const int32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_u64u10__SVBool_tPKm( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svuint64x4_t test_svld4q_u64(svbool_t pg, const uint64_t *base) { @@ -1685,29 +1109,13 @@ svuint64x4_t test_svld4q_u64(svbool_t pg, const uint64_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_s64u10__SVBool_tPKl( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svint64x4_t test_svld4q_s64(svbool_t pg, const int64_t *base) { @@ -1718,29 +1126,13 @@ svint64x4_t test_svld4q_s64(svbool_t pg, const int64_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_f16u10__SVBool_tPKDh( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svfloat16x4_t test_svld4q_f16(svbool_t pg, const float16_t *base) { @@ -1751,29 +1143,13 @@ svfloat16x4_t test_svld4q_f16(svbool_t pg, const float16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z16test_svld4q_bf16u10__SVBool_tPKu6__bf16( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svbfloat16x4_t test_svld4q_bf16(svbool_t pg, const bfloat16_t *base) { @@ -1784,29 +1160,13 @@ svbfloat16x4_t test_svld4q_bf16(svbool_t pg, const bfloat16_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_f32u10__SVBool_tPKf( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svfloat32x4_t test_svld4q_f32(svbool_t pg, const float32_t *base) { @@ -1817,29 +1177,13 @@ svfloat32x4_t test_svld4q_f32(svbool_t pg, const float32_t *base) // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) -// CHECK-NEXT: ret [[TMP9]] +// CHECK-NEXT: ret { , , , } [[TMP1]] // // CPP-CHECK-LABEL: @_Z15test_svld4q_f64u10__SVBool_tPKd( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[BASE:%.*]]) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP9]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP1]] // svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) { @@ -1853,15 +1197,7 @@ svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_u8u10__SVBool_tPKhl( // CPP-CHECK-NEXT: entry: @@ -1870,15 +1206,7 @@ svfloat64x4_t test_svld4q_f64(svbool_t pg, const float64_t *base) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svuint8x4_t test_svld4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) { @@ -1892,15 +1220,7 @@ svuint8x4_t test_svld4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CHECK-NEXT: ret [[TMP11]] +// CHECK-NEXT: ret { , , , } [[TMP3]] // // CPP-CHECK-LABEL: @_Z19test_svld4q_vnum_s8u10__SVBool_tPKal( // CPP-CHECK-NEXT: entry: @@ -1909,15 +1229,7 @@ svuint8x4_t test_svld4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] // CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv16i8( [[PG:%.*]], ptr [[TMP2]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP3]], 0 -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP3]], 1 -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 16) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP3]], 2 -// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 32) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP3]], 3 -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP9]], [[TMP10]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP11]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP3]] // svint8x4_t test_svld4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) { @@ -1931,15 +1243,7 @@ svint8x4_t test_svld4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u16u10__SVBool_tPKtl( // CPP-CHECK-NEXT: entry: @@ -1949,15 +1253,7 @@ svint8x4_t test_svld4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint16x4_t test_svld4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) { @@ -1972,15 +1268,7 @@ svuint16x4_t test_svld4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s16u10__SVBool_tPKsl( // CPP-CHECK-NEXT: entry: @@ -1990,15 +1278,7 @@ svuint16x4_t test_svld4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8i16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint16x4_t test_svld4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) { @@ -2013,15 +1293,7 @@ svint16x4_t test_svld4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u32u10__SVBool_tPKjl( // CPP-CHECK-NEXT: entry: @@ -2031,15 +1303,7 @@ svint16x4_t test_svld4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint32x4_t test_svld4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) { @@ -2054,15 +1318,7 @@ svuint32x4_t test_svld4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s32u10__SVBool_tPKil( // CPP-CHECK-NEXT: entry: @@ -2072,15 +1328,7 @@ svuint32x4_t test_svld4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4i32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint32x4_t test_svld4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) { @@ -2095,15 +1343,7 @@ svint32x4_t test_svld4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_u64u10__SVBool_tPKml( // CPP-CHECK-NEXT: entry: @@ -2113,15 +1353,7 @@ svint32x4_t test_svld4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svuint64x4_t test_svld4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) { @@ -2136,15 +1368,7 @@ svuint64x4_t test_svld4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnu // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_s64u10__SVBool_tPKll( // CPP-CHECK-NEXT: entry: @@ -2154,15 +1378,7 @@ svuint64x4_t test_svld4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnu // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2i64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svint64x4_t test_svld4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) { @@ -2177,15 +1393,7 @@ svint64x4_t test_svld4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f16u10__SVBool_tPKDhl( // CPP-CHECK-NEXT: entry: @@ -2195,15 +1403,7 @@ svint64x4_t test_svld4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8f16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svfloat16x4_t test_svld4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) { @@ -2218,15 +1418,7 @@ svfloat16x4_t test_svld4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z21test_svld4q_vnum_bf16u10__SVBool_tPKu6__bf16l( // CPP-CHECK-NEXT: entry: @@ -2236,15 +1428,7 @@ svfloat16x4_t test_svld4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv8bf16( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svbfloat16x4_t test_svld4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) { @@ -2259,15 +1443,7 @@ svbfloat16x4_t test_svld4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_ // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f32u10__SVBool_tPKfl( // CPP-CHECK-NEXT: entry: @@ -2277,15 +1453,7 @@ svbfloat16x4_t test_svld4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_ // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv4f32( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svfloat32x4_t test_svld4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) { @@ -2300,15 +1468,7 @@ svfloat32x4_t test_svld4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t v // CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[TMP3]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z20test_svld4q_vnum_f64u10__SVBool_tPKdl( // CPP-CHECK-NEXT: entry: @@ -2318,15 +1478,7 @@ svfloat32x4_t test_svld4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t v // CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] // CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld4q.sret.nxv2f64( [[TMP0]], ptr [[TMP3]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svfloat64x4_t test_svld4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c index 642c5f2b88979e..6cea34ee52ef6d 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -141,21 +141,27 @@ svbool_t test_svpext_lane_c64_3(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_0( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 0); @@ -163,21 +169,27 @@ svboolx2_t test_svpext_lane_c8_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c8_x2_1( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP5]] // // CPP-CHECK-LABEL: @_Z24test_svpext_lane_c8_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv16i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { return svpext_lane_c8_x2(c, 1); @@ -185,6 +197,7 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_0( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -192,10 +205,13 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP7]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -203,7 +219,9 @@ svboolx2_t test_svpext_lane_c8_x2_1(svcount_t c) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 0); @@ -211,6 +229,7 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c16_x2_1( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -218,10 +237,13 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP7]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c16_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv8i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -229,7 +251,9 @@ svboolx2_t test_svpext_lane_c16_x2_0(svcount_t c) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { return svpext_lane_c16_x2(c, 1); @@ -237,6 +261,7 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_0( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -244,10 +269,13 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP7]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -255,7 +283,9 @@ svboolx2_t test_svpext_lane_c16_x2_1(svcount_t c) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 0); @@ -263,6 +293,7 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c32_x2_1( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -270,10 +301,13 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP7]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c32_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv4i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -281,7 +315,9 @@ svboolx2_t test_svpext_lane_c32_x2_0(svcount_t c) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { return svpext_lane_c32_x2(c, 1); @@ -289,6 +325,7 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_0( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -296,10 +333,13 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP7]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_0u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -307,7 +347,9 @@ svboolx2_t test_svpext_lane_c32_x2_1(svcount_t c) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 0); @@ -315,6 +357,7 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CHECK-LABEL: @test_svpext_lane_c64_x2_1( // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -322,10 +365,13 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { , } [[TMP7]] // // CPP-CHECK-LABEL: @_Z25test_svpext_lane_c64_x2_1u11__SVCount_t( // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 8 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.pext.x2.nxv2i1(target("aarch64.svcount") [[C:%.*]], i32 1) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -333,7 +379,9 @@ svboolx2_t test_svpext_lane_c64_x2_0(svcount_t c) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 8 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svpext_lane_c64_x2_1(svcount_t c) ATTR { return svpext_lane_c64_x2(c, 1); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c index df9e90ea119f80..5ab666117a1033 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qcvtn.c @@ -29,17 +29,13 @@ // CHECK-LABEL: @test_qcvtn_s16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_qcvtn_s16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqcvtn_s16,_s32_x2,,)(zn); @@ -47,17 +43,13 @@ svint16_t test_qcvtn_s16_s32_x2(svint32x2_t zn) ATTR { // CHECK-LABEL: @test_qcvtn_u16_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_qcvtn_u16_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqcvtn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqcvtn_u16,_u32_x2,,)(zn); @@ -65,17 +57,13 @@ svuint16_t test_qcvtn_u16_u32_x2(svuint32x2_t zn) ATTR { // CHECK-LABEL: @test_qcvtn_u16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z21test_qcvtn_u16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x2.nxv4i32( [[TMP0]], [[TMP1]]) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqcvtun.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_qcvtn_u16_s32_x2(svint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqcvtn_u16,_s32_x2,,)(zn); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c index c35194e32479ea..f95b22fdaeaf48 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_qrshr.c @@ -28,17 +28,13 @@ // CHECK-LABEL: @test_svqrshrn_s16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svqrshrn_s16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svint16_t test_svqrshrn_s16_s32_x2(svint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqrshrn,_n,_s16,_s32_x2,)(zn, 16); @@ -48,17 +44,13 @@ svint16_t test_svqrshrn_s16_s32_x2(svint32x2_t zn) ATTR { // CHECK-LABEL: @test_svqrshrn_u16_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z24test_svqrshrn_u16_u32_x212svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uqrshrn.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svqrshrn_u16_u32_x2(svuint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqrshrn,_n,_u16,_u32_x2,)(zn, 16); @@ -68,17 +60,13 @@ svuint16_t test_svqrshrn_u16_u32_x2(svuint32x2_t zn) ATTR { // CHECK-LABEL: @test_svqrshrun_u16_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CHECK-NEXT: ret [[TMP2]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CHECK-NEXT: ret [[TMP0]] // // CPP-CHECK-LABEL: @_Z25test_svqrshrun_u16_s32_x211svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x2.nxv4i32( [[TMP0]], [[TMP1]], i32 16) -// CPP-CHECK-NEXT: ret [[TMP2]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sqrshrun.x2.nxv4i32( [[ZN_COERCE0:%.*]], [[ZN_COERCE1:%.*]], i32 16) +// CPP-CHECK-NEXT: ret [[TMP0]] // svuint16_t test_svqrshrun_u16_s32_x2(svint32x2_t zn) ATTR { return SVE_ACLE_FUNC(svqrshrun,_n,_u16,_s32_x2,)(zn, 16); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c index dafad8d4c7ab6e..67e32a88aae743 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set2_bool.c @@ -33,13 +33,17 @@ // CHECK-LABEL: @test_svset2_b_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_b_010svboolx2_tu10__SVBool_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svboolx2_t test_svset2_b_0(svboolx2_t tuple, svbool_t x) ATTR { @@ -48,13 +52,17 @@ svboolx2_t test_svset2_b_0(svboolx2_t tuple, svbool_t x) ATTR // CHECK-LABEL: @test_svset2_b_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , } [[TMP2]] // // CPP-CHECK-LABEL: @_Z15test_svset2_b_110svboolx2_tu10__SVBool_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , } [[TMP1]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , } [[TMP2]] // svboolx2_t test_svset2_b_1(svboolx2_t tuple, svbool_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c index 272b16f9d9a561..6e4016d8e120ef 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_set4_bool.c @@ -32,13 +32,21 @@ // CHECK-LABEL: @test_svset4_b_0( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_b_010svboolx4_tu10__SVBool_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 0) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 0 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svboolx4_t test_svset4_b_0(svboolx4_t tuple, svbool_t x) ATTR { @@ -47,13 +55,21 @@ svboolx4_t test_svset4_b_0(svboolx4_t tuple, svbool_t x) ATTR // CHECK-LABEL: @test_svset4_b_1( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_b_110svboolx4_tu10__SVBool_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 1 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svboolx4_t test_svset4_b_1(svboolx4_t tuple, svbool_t x) ATTR { @@ -62,13 +78,21 @@ svboolx4_t test_svset4_b_1(svboolx4_t tuple, svbool_t x) ATTR // CHECK-LABEL: @test_svset4_b_3( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 48) -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CHECK-NEXT: ret { , , , } [[TMP4]] // // CPP-CHECK-LABEL: @_Z15test_svset4_b_310svboolx4_tu10__SVBool_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv64i1.nxv16i1( [[TUPLE:%.*]], [[X:%.*]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP0]] +// CPP-CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[TUPLE_COERCE0:%.*]], 0 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[TUPLE_COERCE1:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[TUPLE_COERCE2:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[TUPLE_COERCE3:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = insertvalue { , , , } [[TMP3]], [[X:%.*]], 3 +// CPP-CHECK-NEXT: ret { , , , } [[TMP4]] // svboolx4_t test_svset4_b_3(svboolx4_t tuple, svbool_t x) ATTR { diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c index c1252ed7335cfa..9db3e5e98975a1 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c @@ -23,16 +23,12 @@ // CHECK-LABEL: @test_svst1_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst1_u8_x2u11__SVCount_tPh11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) ATTR @@ -42,16 +38,12 @@ void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) ATTR // CHECK-LABEL: @test_svst1_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_u16_x2u11__SVCount_tPt12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) ATTR @@ -61,16 +53,12 @@ void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) ATTR // CHECK-LABEL: @test_svst1_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_u32_x2u11__SVCount_tPj12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) ATTR @@ -80,16 +68,12 @@ void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) ATTR // CHECK-LABEL: @test_svst1_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_u64_x2u11__SVCount_tPm12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) ATTR @@ -99,20 +83,12 @@ void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) ATTR // CHECK-LABEL: @test_svst1_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst1_u8_x4u11__SVCount_tPh11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) ATTR @@ -122,20 +98,12 @@ void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) ATTR // CHECK-LABEL: @test_svst1_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_u16_x4u11__SVCount_tPt12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) ATTR @@ -145,20 +113,12 @@ void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) ATTR // CHECK-LABEL: @test_svst1_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_u32_x4u11__SVCount_tPj12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) ATTR @@ -168,20 +128,12 @@ void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) ATTR // CHECK-LABEL: @test_svst1_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_u64_x4u11__SVCount_tPm12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) ATTR @@ -191,16 +143,12 @@ void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) ATTR // CHECK-LABEL: @test_svst1_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst1_s8_x2u11__SVCount_tPa10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) ATTR @@ -210,16 +158,12 @@ void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) ATTR // CHECK-LABEL: @test_svst1_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_s16_x2u11__SVCount_tPs11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) ATTR @@ -229,16 +173,12 @@ void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) ATTR // CHECK-LABEL: @test_svst1_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_s32_x2u11__SVCount_tPi11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) ATTR @@ -248,16 +188,12 @@ void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) ATTR // CHECK-LABEL: @test_svst1_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_s64_x2u11__SVCount_tPl11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) ATTR @@ -267,20 +203,12 @@ void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) ATTR // CHECK-LABEL: @test_svst1_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst1_s8_x4u11__SVCount_tPa10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) ATTR @@ -290,20 +218,12 @@ void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) ATTR // CHECK-LABEL: @test_svst1_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_s16_x4u11__SVCount_tPs11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) ATTR @@ -313,20 +233,12 @@ void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) ATTR // CHECK-LABEL: @test_svst1_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_s32_x4u11__SVCount_tPi11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) ATTR @@ -336,20 +248,12 @@ void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) ATTR // CHECK-LABEL: @test_svst1_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_s64_x4u11__SVCount_tPl11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) ATTR @@ -359,16 +263,12 @@ void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) ATTR // CHECK-LABEL: @test_svst1_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_f16_x2u11__SVCount_tPDh13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) ATTR @@ -378,16 +278,12 @@ void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) ATTR // CHECK-LABEL: @test_svst1_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_f32_x2u11__SVCount_tPf13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) ATTR @@ -397,16 +293,12 @@ void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) ATTR // CHECK-LABEL: @test_svst1_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_f64_x2u11__SVCount_tPd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR @@ -416,20 +308,12 @@ void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR // CHECK-LABEL: @test_svst1_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_f16_x4u11__SVCount_tPDh13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) ATTR @@ -439,20 +323,12 @@ void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) ATTR // CHECK-LABEL: @test_svst1_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_f32_x4u11__SVCount_tPf13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) ATTR @@ -462,20 +338,12 @@ void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) ATTR // CHECK-LABEL: @test_svst1_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z17test_svst1_f64_x4u11__SVCount_tPd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR @@ -489,24 +357,20 @@ void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR // CHECK-LABEL: @test_svst1_vnum_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) ATTR @@ -516,24 +380,20 @@ void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_ // CHECK-LABEL: @test_svst1_vnum_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) ATTR @@ -543,24 +403,20 @@ void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CHECK-LABEL: @test_svst1_vnum_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) ATTR @@ -570,24 +426,20 @@ void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CHECK-LABEL: @test_svst1_vnum_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) ATTR @@ -597,28 +449,20 @@ void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CHECK-LABEL: @test_svst1_vnum_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) ATTR @@ -628,28 +472,20 @@ void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_ // CHECK-LABEL: @test_svst1_vnum_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) ATTR @@ -659,28 +495,20 @@ void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16 // CHECK-LABEL: @test_svst1_vnum_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) ATTR @@ -690,28 +518,20 @@ void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32 // CHECK-LABEL: @test_svst1_vnum_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) ATTR @@ -721,24 +541,20 @@ void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64 // CHECK-LABEL: @test_svst1_vnum_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) ATTR @@ -748,24 +564,20 @@ void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t // CHECK-LABEL: @test_svst1_vnum_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) ATTR @@ -775,24 +587,20 @@ void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2 // CHECK-LABEL: @test_svst1_vnum_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) ATTR @@ -802,24 +610,20 @@ void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2 // CHECK-LABEL: @test_svst1_vnum_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) ATTR @@ -829,28 +633,20 @@ void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2 // CHECK-LABEL: @test_svst1_vnum_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) ATTR @@ -860,28 +656,20 @@ void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t // CHECK-LABEL: @test_svst1_vnum_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) ATTR @@ -891,28 +679,20 @@ void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4 // CHECK-LABEL: @test_svst1_vnum_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) ATTR @@ -922,28 +702,20 @@ void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4 // CHECK-LABEL: @test_svst1_vnum_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) ATTR @@ -954,25 +726,21 @@ void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4 // CHECK-LABEL: @test_svst1_vnum_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f16_x2u11__SVCount_tPDhd13svfloat16x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) ATTR @@ -983,25 +751,21 @@ void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_f32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f32_x2u11__SVCount_tPfd13svfloat32x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) ATTR @@ -1012,25 +776,21 @@ void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_f64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f64_x2u11__SVCount_tPdd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) ATTR @@ -1041,29 +801,21 @@ void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f16_x4u11__SVCount_tPDhd13svfloat16x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) ATTR @@ -1074,29 +826,21 @@ void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_f32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f32_x4u11__SVCount_tPfd13svfloat32x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) ATTR @@ -1107,29 +851,21 @@ void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svflo // CHECK-LABEL: @test_svst1_vnum_f64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f64_x4u11__SVCount_tPdd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) ATTR diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c index 2db3220e5fe063..ed1959327a6115 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -24,16 +24,12 @@ // CHECK-LABEL: @test_svstnt1_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svstnt1_u8_x2u11__SVCount_tPh11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) ATTR @@ -44,16 +40,12 @@ void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_u16_x2u11__SVCount_tPt12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) ATTR @@ -64,16 +56,12 @@ void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_u32_x2u11__SVCount_tPj12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) ATTR @@ -84,16 +72,12 @@ void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_u64_x2u11__SVCount_tPm12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) ATTR @@ -104,20 +88,12 @@ void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svstnt1_u8_x4u11__SVCount_tPh11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) ATTR @@ -128,20 +104,12 @@ void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_u16_x4u11__SVCount_tPt12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) ATTR @@ -152,20 +120,12 @@ void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_u32_x4u11__SVCount_tPj12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) ATTR @@ -176,20 +136,12 @@ void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_u64_x4u11__SVCount_tPm12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) ATTR @@ -200,16 +152,12 @@ void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svstnt1_s8_x2u11__SVCount_tPa10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) ATTR @@ -220,16 +168,12 @@ void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_s16_x2u11__SVCount_tPs11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) ATTR @@ -240,16 +184,12 @@ void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_s32_x2u11__SVCount_tPi11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) ATTR @@ -260,16 +200,12 @@ void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_s64_x2u11__SVCount_tPl11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) ATTR @@ -280,20 +216,12 @@ void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z18test_svstnt1_s8_x4u11__SVCount_tPa10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) ATTR @@ -304,20 +232,12 @@ void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_s16_x4u11__SVCount_tPs11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) ATTR @@ -328,20 +248,12 @@ void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_s32_x4u11__SVCount_tPi11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) ATTR @@ -352,20 +264,12 @@ void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_s64_x4u11__SVCount_tPl11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) ATTR @@ -376,16 +280,12 @@ void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_f16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_f16_x2u11__SVCount_tPDh13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) ATTR @@ -396,16 +296,12 @@ void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_f32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_f32_x2u11__SVCount_tPf13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) ATTR @@ -416,16 +312,12 @@ void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_f64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_f64_x2u11__SVCount_tPd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR @@ -436,20 +328,12 @@ void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) ATTR // CHECK-LABEL: @test_svstnt1_f16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_f16_x4u11__SVCount_tPDh13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) ATTR @@ -460,20 +344,12 @@ void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_f32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_f32_x4u11__SVCount_tPf13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) ATTR @@ -484,20 +360,12 @@ void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_f64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svstnt1_f64_x4u11__SVCount_tPd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR @@ -510,24 +378,20 @@ void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR // CHECK-LABEL: @test_svstnt1_vnum_u8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) ATTR @@ -538,24 +402,20 @@ void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CHECK-LABEL: @test_svstnt1_vnum_u16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) ATTR @@ -566,24 +426,20 @@ void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) ATTR @@ -594,24 +450,20 @@ void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) ATTR @@ -622,28 +474,20 @@ void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) ATTR @@ -654,28 +498,20 @@ void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x // CHECK-LABEL: @test_svstnt1_vnum_u16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) ATTR @@ -686,28 +522,20 @@ void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) ATTR @@ -718,28 +546,20 @@ void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_u64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) ATTR @@ -750,24 +570,20 @@ void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint // CHECK-LABEL: @test_svstnt1_vnum_s8_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) ATTR @@ -778,24 +594,20 @@ void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_ // CHECK-LABEL: @test_svstnt1_vnum_s16_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) ATTR @@ -806,24 +618,20 @@ void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CHECK-LABEL: @test_svstnt1_vnum_s32_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) ATTR @@ -834,24 +642,20 @@ void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CHECK-LABEL: @test_svstnt1_vnum_s64_x2( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) ATTR @@ -862,28 +666,20 @@ void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CHECK-LABEL: @test_svstnt1_vnum_s8_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) ATTR @@ -894,28 +690,20 @@ void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_ // CHECK-LABEL: @test_svstnt1_vnum_s16_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) ATTR @@ -926,28 +714,20 @@ void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16 // CHECK-LABEL: @test_svstnt1_vnum_s32_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) ATTR @@ -958,28 +738,20 @@ void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32 // CHECK-LABEL: @test_svstnt1_vnum_s64_x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[VNUM:%.*]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) ATTR @@ -991,25 +763,21 @@ void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64 // CHECK-LABEL: @test_svstnt1_vnum_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f16_x2u11__SVCount_tPDhd13svfloat16x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) ATTR @@ -1021,25 +789,21 @@ void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_f32_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f32_x2u11__SVCount_tPfd13svfloat32x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) ATTR @@ -1051,25 +815,21 @@ void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_f64_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f64_x2u11__SVCount_tPdd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP3]], [[TMP2]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[TMP0]], [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) ATTR @@ -1081,29 +841,21 @@ void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f16_x4u11__SVCount_tPDhd13svfloat16x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) ATTR @@ -1115,29 +867,21 @@ void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_f32_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f32_x4u11__SVCount_tPfd13svfloat32x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) ATTR @@ -1149,29 +893,21 @@ void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svf // CHECK-LABEL: @test_svstnt1_vnum_f64_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f64_x4u11__SVCount_tPdd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[CONV]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP5]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[CONV]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64( [[V_COERCE0:%.*]], [[V_COERCE1:%.*]], [[V_COERCE2:%.*]], [[V_COERCE3:%.*]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) ATTR diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c index 1def0289c12ae4..657787e851ee20 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_store.c @@ -16,16 +16,12 @@ // CHECK-LABEL: @test_svst2q_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2q_u8u10__SVBool_tPKh11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_u8(svbool_t pg, const uint8_t *base, svuint8x2_t zt) @@ -35,16 +31,12 @@ void test_svst2q_u8(svbool_t pg, const uint8_t *base, svuint8x2_t zt) // CHECK-LABEL: @test_svst2q_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst2q_s8u10__SVBool_tPKa10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_s8(svbool_t pg, const int8_t *base, svint8x2_t zt) @@ -53,18 +45,14 @@ void test_svst2q_s8(svbool_t pg, const int8_t *base, svint8x2_t zt) } // CHECK-LABEL: @test_svst2q_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_u16u10__SVBool_tPKt12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_u16(svbool_t pg, const uint16_t *base, svuint16x2_t zt) @@ -74,18 +62,14 @@ void test_svst2q_u16(svbool_t pg, const uint16_t *base, svuint16x2_t zt) // CHECK-LABEL: @test_svst2q_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_s16u10__SVBool_tPKs11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_s16(svbool_t pg, const int16_t *base, svint16x2_t zt) @@ -95,18 +79,14 @@ void test_svst2q_s16(svbool_t pg, const int16_t *base, svint16x2_t zt) // CHECK-LABEL: @test_svst2q_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_u32u10__SVBool_tPKj12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_u32(svbool_t pg, const uint32_t *base, svuint32x2_t zt) @@ -116,18 +96,14 @@ void test_svst2q_u32(svbool_t pg, const uint32_t *base, svuint32x2_t zt) // CHECK-LABEL: @test_svst2q_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_s32u10__SVBool_tPKi11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_s32(svbool_t pg, const int32_t *base, svint32x2_t zt) @@ -137,18 +113,14 @@ void test_svst2q_s32(svbool_t pg, const int32_t *base, svint32x2_t zt) // CHECK-LABEL: @test_svst2q_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_u64u10__SVBool_tPKm12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_u64(svbool_t pg, const uint64_t *base, svuint64x2_t zt) @@ -158,18 +130,14 @@ void test_svst2q_u64(svbool_t pg, const uint64_t *base, svuint64x2_t zt) // CHECK-LABEL: @test_svst2q_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_s64u10__SVBool_tPKl11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_s64(svbool_t pg, const int64_t *base, svint64x2_t zt) @@ -179,18 +147,14 @@ void test_svst2q_s64(svbool_t pg, const int64_t *base, svint64x2_t zt) // CHECK-LABEL: @test_svst2q_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_f16u10__SVBool_tPKDh13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_f16(svbool_t pg, const float16_t *base, svfloat16x2_t zt) @@ -200,18 +164,14 @@ void test_svst2q_f16(svbool_t pg, const float16_t *base, svfloat16x2_t zt) // CHECK-LABEL: @test_svst2q_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst2q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x2_t zt) @@ -221,18 +181,14 @@ void test_svst2q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x2_t zt) // CHECK-LABEL: @test_svst2q_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_f32u10__SVBool_tPKf13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_f32(svbool_t pg, const float32_t *base, svfloat32x2_t zt) @@ -242,18 +198,14 @@ void test_svst2q_f32(svbool_t pg, const float32_t *base, svfloat32x2_t zt) // CHECK-LABEL: @test_svst2q_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst2q_f64u10__SVBool_tPKd13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) @@ -263,24 +215,20 @@ void test_svst2q_f64(svbool_t pg, const float64_t *base, svfloat64x2_t zt) // CHECK-LABEL: @test_svst2q_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP3]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_u8u10__SVBool_tPKhl11svuint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP3]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x2_t zt) @@ -290,24 +238,20 @@ void test_svst2q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8 // CHECK-LABEL: @test_svst2q_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP3]] -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst2q_vnum_s8u10__SVBool_tPKal10svint8x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP3]] -// CPP-CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[TMP0]], [[TMP1]], [[PG:%.*]], ptr [[TMP4]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x2_t zt) @@ -317,26 +261,22 @@ void test_svst2q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x2 // CHECK-LABEL: @test_svst2q_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u16u10__SVBool_tPKtl12svuint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x2_t zt) @@ -346,26 +286,22 @@ void test_svst2q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst2q_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s16u10__SVBool_tPKsl11svint16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x2_t zt) @@ -375,26 +311,22 @@ void test_svst2q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint1 // CHECK-LABEL: @test_svst2q_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u32u10__SVBool_tPKjl12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x2_t zt) @@ -404,26 +336,22 @@ void test_svst2q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst2q_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s32u10__SVBool_tPKil11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x2_t zt) @@ -433,26 +361,22 @@ void test_svst2q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint3 // CHECK-LABEL: @test_svst2q_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_u64u10__SVBool_tPKml12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x2_t zt) @@ -462,26 +386,22 @@ void test_svst2q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst2q_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_s64u10__SVBool_tPKll11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x2_t zt) @@ -491,26 +411,22 @@ void test_svst2q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint6 // CHECK-LABEL: @test_svst2q_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f16u10__SVBool_tPKDhl13svfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x2_t zt) @@ -520,26 +436,22 @@ void test_svst2q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfl // CHECK-LABEL: @test_svst2q_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst2q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x2_t zt) @@ -549,26 +461,22 @@ void test_svst2q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, sv // CHECK-LABEL: @test_svst2q_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f32u10__SVBool_tPKfl13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x2_t zt) @@ -578,26 +486,22 @@ void test_svst2q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfl // CHECK-LABEL: @test_svst2q_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst2q_vnum_f64u10__SVBool_tPKdl13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st2q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x2_t zt) @@ -609,18 +513,12 @@ void test_svst2q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl // ST3Q // CHECK-LABEL: @test_svst3q_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3q_u8u10__SVBool_tPKh11svuint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_u8(svbool_t pg, const uint8_t *base, svuint8x3_t zt) @@ -630,18 +528,12 @@ void test_svst3q_u8(svbool_t pg, const uint8_t *base, svuint8x3_t zt) // CHECK-LABEL: @test_svst3q_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst3q_s8u10__SVBool_tPKa10svint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_s8(svbool_t pg, const int8_t *base, svint8x3_t zt) @@ -650,20 +542,14 @@ void test_svst3q_s8(svbool_t pg, const int8_t *base, svint8x3_t zt) } // CHECK-LABEL: @test_svst3q_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_u16u10__SVBool_tPKt12svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_u16(svbool_t pg, const uint16_t *base, svuint16x3_t zt) @@ -673,20 +559,14 @@ void test_svst3q_u16(svbool_t pg, const uint16_t *base, svuint16x3_t zt) // CHECK-LABEL: @test_svst3q_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_s16u10__SVBool_tPKs11svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_s16(svbool_t pg, const int16_t *base, svint16x3_t zt) @@ -696,20 +576,14 @@ void test_svst3q_s16(svbool_t pg, const int16_t *base, svint16x3_t zt) // CHECK-LABEL: @test_svst3q_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_u32u10__SVBool_tPKj12svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_u32(svbool_t pg, const uint32_t *base, svuint32x3_t zt) @@ -719,20 +593,14 @@ void test_svst3q_u32(svbool_t pg, const uint32_t *base, svuint32x3_t zt) // CHECK-LABEL: @test_svst3q_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_s32u10__SVBool_tPKi11svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_s32(svbool_t pg, const int32_t *base, svint32x3_t zt) @@ -742,20 +610,14 @@ void test_svst3q_s32(svbool_t pg, const int32_t *base, svint32x3_t zt) // CHECK-LABEL: @test_svst3q_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_u64u10__SVBool_tPKm12svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_u64(svbool_t pg, const uint64_t *base, svuint64x3_t zt) @@ -765,20 +627,14 @@ void test_svst3q_u64(svbool_t pg, const uint64_t *base, svuint64x3_t zt) // CHECK-LABEL: @test_svst3q_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_s64u10__SVBool_tPKl11svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_s64(svbool_t pg, const int64_t *base, svint64x3_t zt) @@ -788,20 +644,14 @@ void test_svst3q_s64(svbool_t pg, const int64_t *base, svint64x3_t zt) // CHECK-LABEL: @test_svst3q_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_f16u10__SVBool_tPKDh13svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_f16(svbool_t pg, const float16_t *base, svfloat16x3_t zt) @@ -811,20 +661,14 @@ void test_svst3q_f16(svbool_t pg, const float16_t *base, svfloat16x3_t zt) // CHECK-LABEL: @test_svst3q_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst3q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x3_t zt) @@ -834,20 +678,14 @@ void test_svst3q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x3_t zt) // CHECK-LABEL: @test_svst3q_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_f32u10__SVBool_tPKf13svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_f32(svbool_t pg, const float32_t *base, svfloat32x3_t zt) @@ -857,20 +695,14 @@ void test_svst3q_f32(svbool_t pg, const float32_t *base, svfloat32x3_t zt) // CHECK-LABEL: @test_svst3q_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst3q_f64u10__SVBool_tPKd13svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) @@ -880,26 +712,20 @@ void test_svst3q_f64(svbool_t pg, const float64_t *base, svfloat64x3_t zt) // CHECK-LABEL: @test_svst3q_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_u8u10__SVBool_tPKhl11svuint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x3_t zt) @@ -909,26 +735,20 @@ void test_svst3q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8 // CHECK-LABEL: @test_svst3q_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP5]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst3q_vnum_s8u10__SVBool_tPKal10svint8x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv48i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP4]] -// CPP-CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[PG:%.*]], ptr [[TMP5]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x3_t zt) @@ -938,28 +758,22 @@ void test_svst3q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x3 // CHECK-LABEL: @test_svst3q_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u16u10__SVBool_tPKtl12svuint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x3_t zt) @@ -969,28 +783,22 @@ void test_svst3q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst3q_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s16u10__SVBool_tPKsl11svint16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv24i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x3_t zt) @@ -1000,28 +808,22 @@ void test_svst3q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint1 // CHECK-LABEL: @test_svst3q_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u32u10__SVBool_tPKjl12svuint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x3_t zt) @@ -1031,28 +833,22 @@ void test_svst3q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst3q_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s32u10__SVBool_tPKil11svint32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv12i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x3_t zt) @@ -1062,28 +858,22 @@ void test_svst3q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint3 // CHECK-LABEL: @test_svst3q_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_u64u10__SVBool_tPKml12svuint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x3_t zt) @@ -1093,28 +883,22 @@ void test_svst3q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst3q_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_s64u10__SVBool_tPKll11svint64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv6i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x3_t zt) @@ -1124,28 +908,22 @@ void test_svst3q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint6 // CHECK-LABEL: @test_svst3q_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f16u10__SVBool_tPKDhl13svfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv24f16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x3_t zt) @@ -1155,28 +933,22 @@ void test_svst3q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfl // CHECK-LABEL: @test_svst3q_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst3q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv24bf16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x3_t zt) @@ -1186,28 +958,22 @@ void test_svst3q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, sv // CHECK-LABEL: @test_svst3q_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f32u10__SVBool_tPKfl13svfloat32x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv12f32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x3_t zt) @@ -1217,28 +983,22 @@ void test_svst3q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfl // CHECK-LABEL: @test_svst3q_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst3q_vnum_f64u10__SVBool_tPKdl13svfloat64x3_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv6f64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st3q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x3_t zt) @@ -1250,20 +1010,12 @@ void test_svst3q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfl // ST4Q // CHECK-LABEL: @test_svst4q_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4q_u8u10__SVBool_tPKh11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_u8(svbool_t pg, const uint8_t *base, svuint8x4_t zt) @@ -1273,20 +1025,12 @@ void test_svst4q_u8(svbool_t pg, const uint8_t *base, svuint8x4_t zt) // CHECK-LABEL: @test_svst4q_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z14test_svst4q_s8u10__SVBool_tPKa10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_s8(svbool_t pg, const int8_t *base, svint8x4_t zt) @@ -1295,22 +1039,14 @@ void test_svst4q_s8(svbool_t pg, const int8_t *base, svint8x4_t zt) } // CHECK-LABEL: @test_svst4q_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_u16u10__SVBool_tPKt12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_u16(svbool_t pg, const uint16_t *base, svuint16x4_t zt) @@ -1320,22 +1056,14 @@ void test_svst4q_u16(svbool_t pg, const uint16_t *base, svuint16x4_t zt) // CHECK-LABEL: @test_svst4q_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_s16u10__SVBool_tPKs11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_s16(svbool_t pg, const int16_t *base, svint16x4_t zt) @@ -1345,22 +1073,14 @@ void test_svst4q_s16(svbool_t pg, const int16_t *base, svint16x4_t zt) // CHECK-LABEL: @test_svst4q_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_u32u10__SVBool_tPKj12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_u32(svbool_t pg, const uint32_t *base, svuint32x4_t zt) @@ -1370,22 +1090,14 @@ void test_svst4q_u32(svbool_t pg, const uint32_t *base, svuint32x4_t zt) // CHECK-LABEL: @test_svst4q_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_s32u10__SVBool_tPKi11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_s32(svbool_t pg, const int32_t *base, svint32x4_t zt) @@ -1395,22 +1107,14 @@ void test_svst4q_s32(svbool_t pg, const int32_t *base, svint32x4_t zt) // CHECK-LABEL: @test_svst4q_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_u64u10__SVBool_tPKm12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_u64(svbool_t pg, const uint64_t *base, svuint64x4_t zt) @@ -1420,22 +1124,14 @@ void test_svst4q_u64(svbool_t pg, const uint64_t *base, svuint64x4_t zt) // CHECK-LABEL: @test_svst4q_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_s64u10__SVBool_tPKl11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_s64(svbool_t pg, const int64_t *base, svint64x4_t zt) @@ -1445,22 +1141,14 @@ void test_svst4q_s64(svbool_t pg, const int64_t *base, svint64x4_t zt) // CHECK-LABEL: @test_svst4q_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_f16u10__SVBool_tPKDh13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_f16(svbool_t pg, const float16_t *base, svfloat16x4_t zt) @@ -1470,22 +1158,14 @@ void test_svst4q_f16(svbool_t pg, const float16_t *base, svfloat16x4_t zt) // CHECK-LABEL: @test_svst4q_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z16test_svst4q_bf16u10__SVBool_tPKu6__bf1614svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x4_t zt) @@ -1495,22 +1175,14 @@ void test_svst4q_bf16(svbool_t pg, const bfloat16_t *base, svbfloat16x4_t zt) // CHECK-LABEL: @test_svst4q_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_f32u10__SVBool_tPKf13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_f32(svbool_t pg, const float32_t *base, svfloat32x4_t zt) @@ -1520,22 +1192,14 @@ void test_svst4q_f32(svbool_t pg, const float32_t *base, svfloat32x4_t zt) // CHECK-LABEL: @test_svst4q_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z15test_svst4q_f64u10__SVBool_tPKd13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[BASE:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) @@ -1545,28 +1209,20 @@ void test_svst4q_f64(svbool_t pg, const float64_t *base, svfloat64x4_t zt) // CHECK-LABEL: @test_svst4q_vnum_u8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_u8u10__SVBool_tPKhl11svuint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8x4_t zt) @@ -1576,28 +1232,20 @@ void test_svst4q_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum, svuint8 // CHECK-LABEL: @test_svst4q_vnum_s8( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z19test_svst4q_vnum_s8u10__SVBool_tPKal10svint8x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZT]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP5]] -// CPP-CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[PG:%.*]], ptr [[TMP6]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP1]] +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv16i8( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[PG:%.*]], ptr [[TMP2]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x4_t zt) @@ -1607,30 +1255,22 @@ void test_svst4q_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum, svint8x4 // CHECK-LABEL: @test_svst4q_vnum_u16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u16u10__SVBool_tPKtl12svuint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuint16x4_t zt) @@ -1640,30 +1280,22 @@ void test_svst4q_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst4q_vnum_s16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s16u10__SVBool_tPKsl11svint16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8i16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint16x4_t zt) @@ -1673,30 +1305,22 @@ void test_svst4q_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum, svint1 // CHECK-LABEL: @test_svst4q_vnum_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u32u10__SVBool_tPKjl12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuint32x4_t zt) @@ -1706,30 +1330,22 @@ void test_svst4q_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst4q_vnum_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s32u10__SVBool_tPKil11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZT]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4i32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint32x4_t zt) @@ -1739,30 +1355,22 @@ void test_svst4q_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum, svint3 // CHECK-LABEL: @test_svst4q_vnum_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_u64u10__SVBool_tPKml12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuint64x4_t zt) @@ -1772,30 +1380,22 @@ void test_svst4q_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum, svuin // CHECK-LABEL: @test_svst4q_vnum_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_s64u10__SVBool_tPKll11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZT]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2i64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint64x4_t zt) @@ -1805,30 +1405,22 @@ void test_svst4q_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum, svint6 // CHECK-LABEL: @test_svst4q_vnum_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f16u10__SVBool_tPKDhl13svfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8f16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfloat16x4_t zt) @@ -1838,30 +1430,22 @@ void test_svst4q_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum, svfl // CHECK-LABEL: @test_svst4q_vnum_bf16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z21test_svst4q_vnum_bf16u10__SVBool_tPKu6__bf16l14svbfloat16x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZT]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv8bf16( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, svbfloat16x4_t zt) @@ -1871,30 +1455,22 @@ void test_svst4q_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum, sv // CHECK-LABEL: @test_svst4q_vnum_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f32u10__SVBool_tPKfl13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZT]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv4f32( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfloat32x4_t zt) @@ -1904,30 +1480,22 @@ void test_svst4q_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum, svfl // CHECK-LABEL: @test_svst4q_vnum_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z20test_svst4q_vnum_f64u10__SVBool_tPKdl13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZT]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.vscale.i64() -// CPP-CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4 -// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP6]] -// CPP-CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], ptr [[TMP7]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.vscale.i64() +// CPP-CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[VNUM:%.*]], [[TMP2]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st4q.nxv2f64( [[ZT_COERCE0:%.*]], [[ZT_COERCE1:%.*]], [[ZT_COERCE2:%.*]], [[ZT_COERCE3:%.*]], [[TMP0]], ptr [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svst4q_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum, svfloat64x4_t zt) diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c index 1675b1c01252ec..55defc45c54309 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c @@ -15,27 +15,29 @@ #define MODE_ATTR #endif -// CHECK-LABEL: define dso_local @test_svundef2_b( -// CPP-CHECK-LABEL: define dso_local @_Z15test_svundef2_bv( -// +// CHECK-LABEL: define dso_local { , } @test_svundef2_b( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , } undef // +// CPP-CHECK-LABEL: define dso_local { , } @_Z15test_svundef2_bv( +// CPP-CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , } undef // svboolx2_t test_svundef2_b(void) MODE_ATTR { return svundef2_b(); } -// CHECK-LABEL: define dso_local @test_svundef4_b( -// CPP-CHECK-LABEL: define dso_local @_Z15test_svundef4_bv( -// +// CHECK-LABEL: define dso_local { , , , } @test_svundef4_b( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: ret undef +// CHECK-NEXT: ret { , , , } undef // +// CPP-CHECK-LABEL: define dso_local { , , , } @_Z15test_svundef4_bv( +// CPP-CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: ret undef +// CPP-CHECK-NEXT: ret { , , , } undef // svboolx4_t test_svundef4_b(void) MODE_ATTR { return svundef4_b(); diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c index 2771d07df0e4c5..3fcc1dc6c819a4 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_x2.c @@ -20,57 +20,70 @@ #define ATTR #endif -// CHECK-LABEL: define dso_local @test_svwhilege_b8_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilege_b8_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilege_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b8_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilege_b8_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilege_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b8,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b16_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -78,11 +91,14 @@ svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b16_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -90,15 +106,18 @@ svboolx2_t test_svwhilege_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b16_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -106,11 +125,14 @@ svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b16_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -118,15 +140,18 @@ svboolx2_t test_svwhilege_b16_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b16,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b32_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -134,11 +159,14 @@ svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b32_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -146,15 +174,18 @@ svboolx2_t test_svwhilege_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b32_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -162,11 +193,14 @@ svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b32_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -174,15 +208,18 @@ svboolx2_t test_svwhilege_b32_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b32,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b64_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -190,11 +227,14 @@ svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b64_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -202,15 +242,18 @@ svboolx2_t test_svwhilege_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilege_b64_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilege_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -218,11 +261,14 @@ svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilege_b64_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilege_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -230,63 +276,78 @@ svboolx2_t test_svwhilege_b64_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilege_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilege_b64,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b8_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilegt_b8_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilegt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b8_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilegt_b8_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilegt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b8,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b16_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -294,11 +355,14 @@ svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b16_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -306,15 +370,18 @@ svboolx2_t test_svwhilegt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b16_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -322,11 +389,14 @@ svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b16_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -334,15 +404,18 @@ svboolx2_t test_svwhilegt_b16_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b16,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b32_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -350,11 +423,14 @@ svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b32_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -362,15 +438,18 @@ svboolx2_t test_svwhilegt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b32_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -378,11 +457,14 @@ svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b32_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -390,15 +472,18 @@ svboolx2_t test_svwhilegt_b32_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b32,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b64_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -406,11 +491,14 @@ svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b64_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -418,15 +506,18 @@ svboolx2_t test_svwhilegt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilegt_b64_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilegt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -434,11 +525,14 @@ svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilegt_b64_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilegt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -446,63 +540,78 @@ svboolx2_t test_svwhilegt_b64_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilegt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilegt_b64,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b8_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilele_b8_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b8_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilele_b8_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilele_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b8,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b16_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -510,11 +619,14 @@ svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b16_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -522,15 +634,18 @@ svboolx2_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b16_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -538,11 +653,14 @@ svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b16_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -550,15 +668,18 @@ svboolx2_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b16,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b32_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -566,11 +687,14 @@ svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b32_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -578,15 +702,18 @@ svboolx2_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b32_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -594,11 +721,14 @@ svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b32_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -606,15 +736,18 @@ svboolx2_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b32,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b64_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -622,11 +755,14 @@ svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b64_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -634,15 +770,18 @@ svboolx2_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilele_b64_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilele_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -650,11 +789,14 @@ svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilele_b64_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilele_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -662,63 +804,78 @@ svboolx2_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilele_b64,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b8_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilelt_b8_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b8_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b8_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP5]] // -// CPP-CHECK-LABEL: define dso_local @_Z21test_svwhilelt_b8_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z21test_svwhilelt_b8_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, [[TMP1]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP2]], [[TMP3]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP4]] +// CPP-CHECK-NEXT: store [[TMP4]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP5]] // svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b8,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b16_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -726,11 +883,14 @@ svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b16_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -738,15 +898,18 @@ svboolx2_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b16_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b16_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -754,11 +917,14 @@ svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b16_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b16_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP1]]) @@ -766,15 +932,18 @@ svboolx2_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b16,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b32_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -782,11 +951,14 @@ svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b32_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -794,15 +966,18 @@ svboolx2_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b32_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b32_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -810,11 +985,14 @@ svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b32_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b32_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP1]]) @@ -822,15 +1000,18 @@ svboolx2_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b32,_u64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b64_s64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_s64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -838,11 +1019,14 @@ svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b64_s64ll( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_s64ll( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -850,15 +1034,18 @@ svboolx2_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_s64,_x2)(op1, op2); } -// CHECK-LABEL: define dso_local @test_svwhilelt_b64_u64( +// CHECK-LABEL: define dso_local { , } @test_svwhilelt_b64_u64( // CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -866,11 +1053,14 @@ svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] +// CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret { , } [[TMP7]] // -// CPP-CHECK-LABEL: define dso_local @_Z22test_svwhilelt_b64_u64mm( +// CPP-CHECK-LABEL: define dso_local { , } @_Z22test_svwhilelt_b64_u64mm( // CPP-CHECK-SAME: i64 noundef [[OP1:%.*]], i64 noundef [[OP2:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[RETVAL:%.*]] = alloca { , }, align 2 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 [[OP1]], i64 [[OP2]]) // CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP1]]) @@ -878,7 +1068,9 @@ svboolx2_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) ATTR { // CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP0]], 1 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP4]]) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i1.nxv16i1( [[TMP3]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] +// CPP-CHECK-NEXT: store [[TMP6]], ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = load { , }, ptr [[RETVAL]], align 2 +// CPP-CHECK-NEXT: ret { , } [[TMP7]] // svboolx2_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) ATTR { return SVE_ACLE_FUNC(svwhilelt_b64,_u64,_x2)(op1, op2); diff --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c index 94657cec86cd6b..87f0a9e69c0b68 100644 --- a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c +++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c @@ -70,6 +70,7 @@ int64x2_t test_svget_neonq_s64(svint64_t n) { return SVE_ACLE_FUNC(svget_neonq, _s64, , )(n); } +// // CHECK-LABEL: @test_svget_neonq_u8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8( [[N:%.*]], i64 0) @@ -154,7 +155,6 @@ float32x4_t test_svget_neonq_f32(svfloat32_t n) { return SVE_ACLE_FUNC(svget_neonq, _f32, , )(n); } -// // CHECK-LABEL: @test_svget_neonq_f64( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64( [[N:%.*]], i64 0) diff --git a/clang/test/CodeGen/svboolx2_t.cpp b/clang/test/CodeGen/svboolx2_t.cpp index 069d4f1fc46c89..0b798afb8c642f 100644 --- a/clang/test/CodeGen/svboolx2_t.cpp +++ b/clang/test/CodeGen/svboolx2_t.cpp @@ -3,29 +3,44 @@ // CHECK-LABEL: @_Z3foo10svboolx2_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 -// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[ARG:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ARG_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ARG_COERCE1:%.*]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ARG]], align 2 +// CHECK-NEXT: [[ARG1:%.*]] = load { , }, ptr [[ARG]], align 2 +// CHECK-NEXT: store { , } [[ARG1]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: ret { , } [[TMP2]] // __clang_svboolx2_t foo(__clang_svboolx2_t arg) { return arg; } __clang_svboolx2_t bar(); // CHECK-LABEL: @_Z4foo2v( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = call @_Z3barv() -// CHECK-NEXT: ret [[CALL]] +// CHECK-NEXT: [[CALL:%.*]] = call { , } @_Z3barv() +// CHECK-NEXT: ret { , } [[CALL]] // __clang_svboolx2_t foo2() { return bar(); } __clang_svboolx2_t bar2(__clang_svboolx2_t); // CHECK-LABEL: @_Z4foo310svboolx2_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 -// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: [[CALL:%.*]] = call @_Z4bar210svboolx2_t( [[TMP0]]) -// CHECK-NEXT: ret [[CALL]] +// CHECK-NEXT: [[ARG:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[COERCE:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[ARG_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[ARG_COERCE1:%.*]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[ARG]], align 2 +// CHECK-NEXT: [[ARG1:%.*]] = load { , }, ptr [[ARG]], align 2 +// CHECK-NEXT: store { , } [[ARG1]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[COERCE]], align 2 +// CHECK-NEXT: [[COERCE_TUPLE:%.*]] = load { , }, ptr [[COERCE]], align 2 +// CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 0 +// CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 1 +// CHECK-NEXT: [[CALL:%.*]] = call { , } @_Z4bar210svboolx2_t( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +// CHECK-NEXT: ret { , } [[CALL]] // __clang_svboolx2_t foo3(__clang_svboolx2_t arg) { return bar2(arg); } diff --git a/clang/test/CodeGen/svboolx4_t.cpp b/clang/test/CodeGen/svboolx4_t.cpp index ef20dc0302c2a1..d849896bad85f6 100644 --- a/clang/test/CodeGen/svboolx4_t.cpp +++ b/clang/test/CodeGen/svboolx4_t.cpp @@ -3,29 +3,50 @@ // CHECK-LABEL: @_Z3foo10svboolx4_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 -// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: ret [[TMP0]] +// CHECK-NEXT: [[ARG:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[ARG_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[ARG_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[ARG_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[ARG_COERCE3:%.*]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[ARG]], align 2 +// CHECK-NEXT: [[ARG1:%.*]] = load { , , , }, ptr [[ARG]], align 2 +// CHECK-NEXT: store { , , , } [[ARG1]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: ret { , , , } [[TMP4]] // __clang_svboolx4_t foo(__clang_svboolx4_t arg) { return arg; } __clang_svboolx4_t bar(); // CHECK-LABEL: @_Z4foo2v( // CHECK-NEXT: entry: -// CHECK-NEXT: [[CALL:%.*]] = call @_Z3barv() -// CHECK-NEXT: ret [[CALL]] +// CHECK-NEXT: [[CALL:%.*]] = call { , , , } @_Z3barv() +// CHECK-NEXT: ret { , , , } [[CALL]] // __clang_svboolx4_t foo2() { return bar(); } __clang_svboolx4_t bar2(__clang_svboolx4_t); // CHECK-LABEL: @_Z4foo310svboolx4_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 -// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 -// CHECK-NEXT: [[CALL:%.*]] = call @_Z4bar210svboolx4_t( [[TMP0]]) -// CHECK-NEXT: ret [[CALL]] +// CHECK-NEXT: [[ARG:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[COERCE:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[ARG_COERCE0:%.*]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[ARG_COERCE1:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[ARG_COERCE2:%.*]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[ARG_COERCE3:%.*]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[ARG]], align 2 +// CHECK-NEXT: [[ARG1:%.*]] = load { , , , }, ptr [[ARG]], align 2 +// CHECK-NEXT: store { , , , } [[ARG1]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[COERCE]], align 2 +// CHECK-NEXT: [[COERCE_TUPLE:%.*]] = load { , , , }, ptr [[COERCE]], align 2 +// CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE_TUPLE]], 0 +// CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE_TUPLE]], 1 +// CHECK-NEXT: [[COERCE_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE_TUPLE]], 2 +// CHECK-NEXT: [[COERCE_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE_TUPLE]], 3 +// CHECK-NEXT: [[CALL:%.*]] = call { , , , } @_Z4bar210svboolx4_t( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]], [[COERCE_EXTRACT2]], [[COERCE_EXTRACT3]]) +// CHECK-NEXT: ret { , , , } [[CALL]] // __clang_svboolx4_t foo3(__clang_svboolx4_t arg) { return bar2(arg); } diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp index 752b2beca3881b..dfe31ff2ce25fb 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp @@ -62,6 +62,82 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // CHECK-LABEL: define dso_local void @_Z3foov( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[COERCE:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE1:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE2:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE5:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE6:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE7:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE8:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE9:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE10:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE11:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE12:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE13:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE14:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE15:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE16:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE17:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE18:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE19:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE20:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE21:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE22:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE23:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE24:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE25:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE26:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE27:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE28:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE29:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE30:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE31:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE32:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE33:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE34:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE35:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE36:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE37:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE38:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE39:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE40:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE41:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE42:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE43:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE44:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE45:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE46:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE47:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE48:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE49:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE50:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE51:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE52:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE53:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE54:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE55:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE56:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE57:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE58:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE59:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE60:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE61:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE62:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE63:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE64:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE65:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE66:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE67:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[COERCE68:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE69:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[COERCE70:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE71:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[COERCE72:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[COERCE73:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[COERCE74:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[COERCE75:%.*]] = alloca { , , , }, align 2 // CHECK-NEXT: call void @_Z1fu10__SVInt8_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_( zeroinitializer, zeroinitializer) @@ -77,49 +153,505 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // CHECK-NEXT: call void @_Z1fu14__SVBfloat16_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu10__SVBool_tS_( zeroinitializer, zeroinitializer) // CHECK-NEXT: call void @_Z1fu11__SVCount_tS_(target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer) -// CHECK-NEXT: call void @_Z1f10svint8x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f10svint8x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f10svint8x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint16x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint16x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint16x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint32x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint32x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint32x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint64x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint64x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svint64x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svuint8x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svuint8x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f11svuint8x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint16x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint16x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint16x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint32x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint32x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint32x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint64x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint64x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f12svuint64x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat16x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat16x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat16x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat32x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat32x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat32x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat64x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat64x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f13svfloat64x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f14svbfloat16x2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f14svbfloat16x3_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f14svbfloat16x4_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f10svboolx2_tS_( zeroinitializer, zeroinitializer) -// CHECK-NEXT: call void @_Z1f10svboolx4_tS_( zeroinitializer, zeroinitializer) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE]], align 16 +// CHECK-NEXT: [[COERCE_TUPLE:%.*]] = load { , }, ptr [[COERCE]], align 16 +// CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 0 +// CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE1]], align 16 +// CHECK-NEXT: [[COERCE1_TUPLE:%.*]] = load { , }, ptr [[COERCE1]], align 16 +// CHECK-NEXT: [[COERCE1_EXTRACT0:%.*]] = extractvalue { , } [[COERCE1_TUPLE]], 0 +// CHECK-NEXT: [[COERCE1_EXTRACT1:%.*]] = extractvalue { , } [[COERCE1_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f10svint8x2_tS_( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]], [[COERCE1_EXTRACT0]], [[COERCE1_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE2]], align 16 +// CHECK-NEXT: [[COERCE2_TUPLE:%.*]] = load { , , }, ptr [[COERCE2]], align 16 +// CHECK-NEXT: [[COERCE2_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE2_TUPLE]], 0 +// CHECK-NEXT: [[COERCE2_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE2_TUPLE]], 1 +// CHECK-NEXT: [[COERCE2_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE2_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE3]], align 16 +// CHECK-NEXT: [[COERCE3_TUPLE:%.*]] = load { , , }, ptr [[COERCE3]], align 16 +// CHECK-NEXT: [[COERCE3_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE3_TUPLE]], 0 +// CHECK-NEXT: [[COERCE3_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE3_TUPLE]], 1 +// CHECK-NEXT: [[COERCE3_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE3_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f10svint8x3_tS_( [[COERCE2_EXTRACT0]], [[COERCE2_EXTRACT1]], [[COERCE2_EXTRACT2]], [[COERCE3_EXTRACT0]], [[COERCE3_EXTRACT1]], [[COERCE3_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE4]], align 16 +// CHECK-NEXT: [[COERCE4_TUPLE:%.*]] = load { , , , }, ptr [[COERCE4]], align 16 +// CHECK-NEXT: [[COERCE4_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 0 +// CHECK-NEXT: [[COERCE4_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 1 +// CHECK-NEXT: [[COERCE4_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 2 +// CHECK-NEXT: [[COERCE4_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE5]], align 16 +// CHECK-NEXT: [[COERCE5_TUPLE:%.*]] = load { , , , }, ptr [[COERCE5]], align 16 +// CHECK-NEXT: [[COERCE5_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 0 +// CHECK-NEXT: [[COERCE5_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 1 +// CHECK-NEXT: [[COERCE5_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 2 +// CHECK-NEXT: [[COERCE5_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f10svint8x4_tS_( [[COERCE4_EXTRACT0]], [[COERCE4_EXTRACT1]], [[COERCE4_EXTRACT2]], [[COERCE4_EXTRACT3]], [[COERCE5_EXTRACT0]], [[COERCE5_EXTRACT1]], [[COERCE5_EXTRACT2]], [[COERCE5_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE6]], align 16 +// CHECK-NEXT: [[COERCE6_TUPLE:%.*]] = load { , }, ptr [[COERCE6]], align 16 +// CHECK-NEXT: [[COERCE6_EXTRACT0:%.*]] = extractvalue { , } [[COERCE6_TUPLE]], 0 +// CHECK-NEXT: [[COERCE6_EXTRACT1:%.*]] = extractvalue { , } [[COERCE6_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE7]], align 16 +// CHECK-NEXT: [[COERCE7_TUPLE:%.*]] = load { , }, ptr [[COERCE7]], align 16 +// CHECK-NEXT: [[COERCE7_EXTRACT0:%.*]] = extractvalue { , } [[COERCE7_TUPLE]], 0 +// CHECK-NEXT: [[COERCE7_EXTRACT1:%.*]] = extractvalue { , } [[COERCE7_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f11svint16x2_tS_( [[COERCE6_EXTRACT0]], [[COERCE6_EXTRACT1]], [[COERCE7_EXTRACT0]], [[COERCE7_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE8]], align 16 +// CHECK-NEXT: [[COERCE8_TUPLE:%.*]] = load { , , }, ptr [[COERCE8]], align 16 +// CHECK-NEXT: [[COERCE8_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE8_TUPLE]], 0 +// CHECK-NEXT: [[COERCE8_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE8_TUPLE]], 1 +// CHECK-NEXT: [[COERCE8_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE8_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE9]], align 16 +// CHECK-NEXT: [[COERCE9_TUPLE:%.*]] = load { , , }, ptr [[COERCE9]], align 16 +// CHECK-NEXT: [[COERCE9_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE9_TUPLE]], 0 +// CHECK-NEXT: [[COERCE9_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE9_TUPLE]], 1 +// CHECK-NEXT: [[COERCE9_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE9_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f11svint16x3_tS_( [[COERCE8_EXTRACT0]], [[COERCE8_EXTRACT1]], [[COERCE8_EXTRACT2]], [[COERCE9_EXTRACT0]], [[COERCE9_EXTRACT1]], [[COERCE9_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE10]], align 16 +// CHECK-NEXT: [[COERCE10_TUPLE:%.*]] = load { , , , }, ptr [[COERCE10]], align 16 +// CHECK-NEXT: [[COERCE10_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 0 +// CHECK-NEXT: [[COERCE10_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 1 +// CHECK-NEXT: [[COERCE10_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 2 +// CHECK-NEXT: [[COERCE10_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE11]], align 16 +// CHECK-NEXT: [[COERCE11_TUPLE:%.*]] = load { , , , }, ptr [[COERCE11]], align 16 +// CHECK-NEXT: [[COERCE11_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 0 +// CHECK-NEXT: [[COERCE11_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 1 +// CHECK-NEXT: [[COERCE11_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 2 +// CHECK-NEXT: [[COERCE11_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f11svint16x4_tS_( [[COERCE10_EXTRACT0]], [[COERCE10_EXTRACT1]], [[COERCE10_EXTRACT2]], [[COERCE10_EXTRACT3]], [[COERCE11_EXTRACT0]], [[COERCE11_EXTRACT1]], [[COERCE11_EXTRACT2]], [[COERCE11_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE12]], align 16 +// CHECK-NEXT: [[COERCE12_TUPLE:%.*]] = load { , }, ptr [[COERCE12]], align 16 +// CHECK-NEXT: [[COERCE12_EXTRACT0:%.*]] = extractvalue { , } [[COERCE12_TUPLE]], 0 +// CHECK-NEXT: [[COERCE12_EXTRACT1:%.*]] = extractvalue { , } [[COERCE12_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE13]], align 16 +// CHECK-NEXT: [[COERCE13_TUPLE:%.*]] = load { , }, ptr [[COERCE13]], align 16 +// CHECK-NEXT: [[COERCE13_EXTRACT0:%.*]] = extractvalue { , } [[COERCE13_TUPLE]], 0 +// CHECK-NEXT: [[COERCE13_EXTRACT1:%.*]] = extractvalue { , } [[COERCE13_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f11svint32x2_tS_( [[COERCE12_EXTRACT0]], [[COERCE12_EXTRACT1]], [[COERCE13_EXTRACT0]], [[COERCE13_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE14]], align 16 +// CHECK-NEXT: [[COERCE14_TUPLE:%.*]] = load { , , }, ptr [[COERCE14]], align 16 +// CHECK-NEXT: [[COERCE14_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE14_TUPLE]], 0 +// CHECK-NEXT: [[COERCE14_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE14_TUPLE]], 1 +// CHECK-NEXT: [[COERCE14_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE14_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE15]], align 16 +// CHECK-NEXT: [[COERCE15_TUPLE:%.*]] = load { , , }, ptr [[COERCE15]], align 16 +// CHECK-NEXT: [[COERCE15_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE15_TUPLE]], 0 +// CHECK-NEXT: [[COERCE15_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE15_TUPLE]], 1 +// CHECK-NEXT: [[COERCE15_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE15_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f11svint32x3_tS_( [[COERCE14_EXTRACT0]], [[COERCE14_EXTRACT1]], [[COERCE14_EXTRACT2]], [[COERCE15_EXTRACT0]], [[COERCE15_EXTRACT1]], [[COERCE15_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE16]], align 16 +// CHECK-NEXT: [[COERCE16_TUPLE:%.*]] = load { , , , }, ptr [[COERCE16]], align 16 +// CHECK-NEXT: [[COERCE16_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 0 +// CHECK-NEXT: [[COERCE16_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 1 +// CHECK-NEXT: [[COERCE16_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 2 +// CHECK-NEXT: [[COERCE16_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE17]], align 16 +// CHECK-NEXT: [[COERCE17_TUPLE:%.*]] = load { , , , }, ptr [[COERCE17]], align 16 +// CHECK-NEXT: [[COERCE17_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 0 +// CHECK-NEXT: [[COERCE17_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 1 +// CHECK-NEXT: [[COERCE17_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 2 +// CHECK-NEXT: [[COERCE17_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f11svint32x4_tS_( [[COERCE16_EXTRACT0]], [[COERCE16_EXTRACT1]], [[COERCE16_EXTRACT2]], [[COERCE16_EXTRACT3]], [[COERCE17_EXTRACT0]], [[COERCE17_EXTRACT1]], [[COERCE17_EXTRACT2]], [[COERCE17_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE18]], align 16 +// CHECK-NEXT: [[COERCE18_TUPLE:%.*]] = load { , }, ptr [[COERCE18]], align 16 +// CHECK-NEXT: [[COERCE18_EXTRACT0:%.*]] = extractvalue { , } [[COERCE18_TUPLE]], 0 +// CHECK-NEXT: [[COERCE18_EXTRACT1:%.*]] = extractvalue { , } [[COERCE18_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE19]], align 16 +// CHECK-NEXT: [[COERCE19_TUPLE:%.*]] = load { , }, ptr [[COERCE19]], align 16 +// CHECK-NEXT: [[COERCE19_EXTRACT0:%.*]] = extractvalue { , } [[COERCE19_TUPLE]], 0 +// CHECK-NEXT: [[COERCE19_EXTRACT1:%.*]] = extractvalue { , } [[COERCE19_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f11svint64x2_tS_( [[COERCE18_EXTRACT0]], [[COERCE18_EXTRACT1]], [[COERCE19_EXTRACT0]], [[COERCE19_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE20]], align 16 +// CHECK-NEXT: [[COERCE20_TUPLE:%.*]] = load { , , }, ptr [[COERCE20]], align 16 +// CHECK-NEXT: [[COERCE20_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE20_TUPLE]], 0 +// CHECK-NEXT: [[COERCE20_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE20_TUPLE]], 1 +// CHECK-NEXT: [[COERCE20_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE20_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE21]], align 16 +// CHECK-NEXT: [[COERCE21_TUPLE:%.*]] = load { , , }, ptr [[COERCE21]], align 16 +// CHECK-NEXT: [[COERCE21_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE21_TUPLE]], 0 +// CHECK-NEXT: [[COERCE21_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE21_TUPLE]], 1 +// CHECK-NEXT: [[COERCE21_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE21_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f11svint64x3_tS_( [[COERCE20_EXTRACT0]], [[COERCE20_EXTRACT1]], [[COERCE20_EXTRACT2]], [[COERCE21_EXTRACT0]], [[COERCE21_EXTRACT1]], [[COERCE21_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE22]], align 16 +// CHECK-NEXT: [[COERCE22_TUPLE:%.*]] = load { , , , }, ptr [[COERCE22]], align 16 +// CHECK-NEXT: [[COERCE22_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 0 +// CHECK-NEXT: [[COERCE22_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 1 +// CHECK-NEXT: [[COERCE22_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 2 +// CHECK-NEXT: [[COERCE22_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE23]], align 16 +// CHECK-NEXT: [[COERCE23_TUPLE:%.*]] = load { , , , }, ptr [[COERCE23]], align 16 +// CHECK-NEXT: [[COERCE23_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 0 +// CHECK-NEXT: [[COERCE23_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 1 +// CHECK-NEXT: [[COERCE23_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 2 +// CHECK-NEXT: [[COERCE23_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f11svint64x4_tS_( [[COERCE22_EXTRACT0]], [[COERCE22_EXTRACT1]], [[COERCE22_EXTRACT2]], [[COERCE22_EXTRACT3]], [[COERCE23_EXTRACT0]], [[COERCE23_EXTRACT1]], [[COERCE23_EXTRACT2]], [[COERCE23_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE24]], align 16 +// CHECK-NEXT: [[COERCE24_TUPLE:%.*]] = load { , }, ptr [[COERCE24]], align 16 +// CHECK-NEXT: [[COERCE24_EXTRACT0:%.*]] = extractvalue { , } [[COERCE24_TUPLE]], 0 +// CHECK-NEXT: [[COERCE24_EXTRACT1:%.*]] = extractvalue { , } [[COERCE24_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE25]], align 16 +// CHECK-NEXT: [[COERCE25_TUPLE:%.*]] = load { , }, ptr [[COERCE25]], align 16 +// CHECK-NEXT: [[COERCE25_EXTRACT0:%.*]] = extractvalue { , } [[COERCE25_TUPLE]], 0 +// CHECK-NEXT: [[COERCE25_EXTRACT1:%.*]] = extractvalue { , } [[COERCE25_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f11svuint8x2_tS_( [[COERCE24_EXTRACT0]], [[COERCE24_EXTRACT1]], [[COERCE25_EXTRACT0]], [[COERCE25_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE26]], align 16 +// CHECK-NEXT: [[COERCE26_TUPLE:%.*]] = load { , , }, ptr [[COERCE26]], align 16 +// CHECK-NEXT: [[COERCE26_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE26_TUPLE]], 0 +// CHECK-NEXT: [[COERCE26_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE26_TUPLE]], 1 +// CHECK-NEXT: [[COERCE26_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE26_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE27]], align 16 +// CHECK-NEXT: [[COERCE27_TUPLE:%.*]] = load { , , }, ptr [[COERCE27]], align 16 +// CHECK-NEXT: [[COERCE27_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE27_TUPLE]], 0 +// CHECK-NEXT: [[COERCE27_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE27_TUPLE]], 1 +// CHECK-NEXT: [[COERCE27_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE27_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f11svuint8x3_tS_( [[COERCE26_EXTRACT0]], [[COERCE26_EXTRACT1]], [[COERCE26_EXTRACT2]], [[COERCE27_EXTRACT0]], [[COERCE27_EXTRACT1]], [[COERCE27_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE28]], align 16 +// CHECK-NEXT: [[COERCE28_TUPLE:%.*]] = load { , , , }, ptr [[COERCE28]], align 16 +// CHECK-NEXT: [[COERCE28_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 0 +// CHECK-NEXT: [[COERCE28_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 1 +// CHECK-NEXT: [[COERCE28_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 2 +// CHECK-NEXT: [[COERCE28_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE29]], align 16 +// CHECK-NEXT: [[COERCE29_TUPLE:%.*]] = load { , , , }, ptr [[COERCE29]], align 16 +// CHECK-NEXT: [[COERCE29_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 0 +// CHECK-NEXT: [[COERCE29_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 1 +// CHECK-NEXT: [[COERCE29_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 2 +// CHECK-NEXT: [[COERCE29_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f11svuint8x4_tS_( [[COERCE28_EXTRACT0]], [[COERCE28_EXTRACT1]], [[COERCE28_EXTRACT2]], [[COERCE28_EXTRACT3]], [[COERCE29_EXTRACT0]], [[COERCE29_EXTRACT1]], [[COERCE29_EXTRACT2]], [[COERCE29_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE30]], align 16 +// CHECK-NEXT: [[COERCE30_TUPLE:%.*]] = load { , }, ptr [[COERCE30]], align 16 +// CHECK-NEXT: [[COERCE30_EXTRACT0:%.*]] = extractvalue { , } [[COERCE30_TUPLE]], 0 +// CHECK-NEXT: [[COERCE30_EXTRACT1:%.*]] = extractvalue { , } [[COERCE30_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE31]], align 16 +// CHECK-NEXT: [[COERCE31_TUPLE:%.*]] = load { , }, ptr [[COERCE31]], align 16 +// CHECK-NEXT: [[COERCE31_EXTRACT0:%.*]] = extractvalue { , } [[COERCE31_TUPLE]], 0 +// CHECK-NEXT: [[COERCE31_EXTRACT1:%.*]] = extractvalue { , } [[COERCE31_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f12svuint16x2_tS_( [[COERCE30_EXTRACT0]], [[COERCE30_EXTRACT1]], [[COERCE31_EXTRACT0]], [[COERCE31_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE32]], align 16 +// CHECK-NEXT: [[COERCE32_TUPLE:%.*]] = load { , , }, ptr [[COERCE32]], align 16 +// CHECK-NEXT: [[COERCE32_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE32_TUPLE]], 0 +// CHECK-NEXT: [[COERCE32_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE32_TUPLE]], 1 +// CHECK-NEXT: [[COERCE32_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE32_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE33]], align 16 +// CHECK-NEXT: [[COERCE33_TUPLE:%.*]] = load { , , }, ptr [[COERCE33]], align 16 +// CHECK-NEXT: [[COERCE33_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE33_TUPLE]], 0 +// CHECK-NEXT: [[COERCE33_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE33_TUPLE]], 1 +// CHECK-NEXT: [[COERCE33_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE33_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f12svuint16x3_tS_( [[COERCE32_EXTRACT0]], [[COERCE32_EXTRACT1]], [[COERCE32_EXTRACT2]], [[COERCE33_EXTRACT0]], [[COERCE33_EXTRACT1]], [[COERCE33_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE34]], align 16 +// CHECK-NEXT: [[COERCE34_TUPLE:%.*]] = load { , , , }, ptr [[COERCE34]], align 16 +// CHECK-NEXT: [[COERCE34_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 0 +// CHECK-NEXT: [[COERCE34_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 1 +// CHECK-NEXT: [[COERCE34_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 2 +// CHECK-NEXT: [[COERCE34_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE35]], align 16 +// CHECK-NEXT: [[COERCE35_TUPLE:%.*]] = load { , , , }, ptr [[COERCE35]], align 16 +// CHECK-NEXT: [[COERCE35_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 0 +// CHECK-NEXT: [[COERCE35_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 1 +// CHECK-NEXT: [[COERCE35_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 2 +// CHECK-NEXT: [[COERCE35_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f12svuint16x4_tS_( [[COERCE34_EXTRACT0]], [[COERCE34_EXTRACT1]], [[COERCE34_EXTRACT2]], [[COERCE34_EXTRACT3]], [[COERCE35_EXTRACT0]], [[COERCE35_EXTRACT1]], [[COERCE35_EXTRACT2]], [[COERCE35_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE36]], align 16 +// CHECK-NEXT: [[COERCE36_TUPLE:%.*]] = load { , }, ptr [[COERCE36]], align 16 +// CHECK-NEXT: [[COERCE36_EXTRACT0:%.*]] = extractvalue { , } [[COERCE36_TUPLE]], 0 +// CHECK-NEXT: [[COERCE36_EXTRACT1:%.*]] = extractvalue { , } [[COERCE36_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE37]], align 16 +// CHECK-NEXT: [[COERCE37_TUPLE:%.*]] = load { , }, ptr [[COERCE37]], align 16 +// CHECK-NEXT: [[COERCE37_EXTRACT0:%.*]] = extractvalue { , } [[COERCE37_TUPLE]], 0 +// CHECK-NEXT: [[COERCE37_EXTRACT1:%.*]] = extractvalue { , } [[COERCE37_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f12svuint32x2_tS_( [[COERCE36_EXTRACT0]], [[COERCE36_EXTRACT1]], [[COERCE37_EXTRACT0]], [[COERCE37_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE38]], align 16 +// CHECK-NEXT: [[COERCE38_TUPLE:%.*]] = load { , , }, ptr [[COERCE38]], align 16 +// CHECK-NEXT: [[COERCE38_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE38_TUPLE]], 0 +// CHECK-NEXT: [[COERCE38_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE38_TUPLE]], 1 +// CHECK-NEXT: [[COERCE38_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE38_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE39]], align 16 +// CHECK-NEXT: [[COERCE39_TUPLE:%.*]] = load { , , }, ptr [[COERCE39]], align 16 +// CHECK-NEXT: [[COERCE39_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE39_TUPLE]], 0 +// CHECK-NEXT: [[COERCE39_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE39_TUPLE]], 1 +// CHECK-NEXT: [[COERCE39_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE39_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f12svuint32x3_tS_( [[COERCE38_EXTRACT0]], [[COERCE38_EXTRACT1]], [[COERCE38_EXTRACT2]], [[COERCE39_EXTRACT0]], [[COERCE39_EXTRACT1]], [[COERCE39_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE40]], align 16 +// CHECK-NEXT: [[COERCE40_TUPLE:%.*]] = load { , , , }, ptr [[COERCE40]], align 16 +// CHECK-NEXT: [[COERCE40_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 0 +// CHECK-NEXT: [[COERCE40_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 1 +// CHECK-NEXT: [[COERCE40_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 2 +// CHECK-NEXT: [[COERCE40_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE41]], align 16 +// CHECK-NEXT: [[COERCE41_TUPLE:%.*]] = load { , , , }, ptr [[COERCE41]], align 16 +// CHECK-NEXT: [[COERCE41_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 0 +// CHECK-NEXT: [[COERCE41_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 1 +// CHECK-NEXT: [[COERCE41_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 2 +// CHECK-NEXT: [[COERCE41_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f12svuint32x4_tS_( [[COERCE40_EXTRACT0]], [[COERCE40_EXTRACT1]], [[COERCE40_EXTRACT2]], [[COERCE40_EXTRACT3]], [[COERCE41_EXTRACT0]], [[COERCE41_EXTRACT1]], [[COERCE41_EXTRACT2]], [[COERCE41_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE42]], align 16 +// CHECK-NEXT: [[COERCE42_TUPLE:%.*]] = load { , }, ptr [[COERCE42]], align 16 +// CHECK-NEXT: [[COERCE42_EXTRACT0:%.*]] = extractvalue { , } [[COERCE42_TUPLE]], 0 +// CHECK-NEXT: [[COERCE42_EXTRACT1:%.*]] = extractvalue { , } [[COERCE42_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE43]], align 16 +// CHECK-NEXT: [[COERCE43_TUPLE:%.*]] = load { , }, ptr [[COERCE43]], align 16 +// CHECK-NEXT: [[COERCE43_EXTRACT0:%.*]] = extractvalue { , } [[COERCE43_TUPLE]], 0 +// CHECK-NEXT: [[COERCE43_EXTRACT1:%.*]] = extractvalue { , } [[COERCE43_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f12svuint64x2_tS_( [[COERCE42_EXTRACT0]], [[COERCE42_EXTRACT1]], [[COERCE43_EXTRACT0]], [[COERCE43_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE44]], align 16 +// CHECK-NEXT: [[COERCE44_TUPLE:%.*]] = load { , , }, ptr [[COERCE44]], align 16 +// CHECK-NEXT: [[COERCE44_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE44_TUPLE]], 0 +// CHECK-NEXT: [[COERCE44_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE44_TUPLE]], 1 +// CHECK-NEXT: [[COERCE44_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE44_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE45]], align 16 +// CHECK-NEXT: [[COERCE45_TUPLE:%.*]] = load { , , }, ptr [[COERCE45]], align 16 +// CHECK-NEXT: [[COERCE45_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE45_TUPLE]], 0 +// CHECK-NEXT: [[COERCE45_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE45_TUPLE]], 1 +// CHECK-NEXT: [[COERCE45_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE45_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f12svuint64x3_tS_( [[COERCE44_EXTRACT0]], [[COERCE44_EXTRACT1]], [[COERCE44_EXTRACT2]], [[COERCE45_EXTRACT0]], [[COERCE45_EXTRACT1]], [[COERCE45_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE46]], align 16 +// CHECK-NEXT: [[COERCE46_TUPLE:%.*]] = load { , , , }, ptr [[COERCE46]], align 16 +// CHECK-NEXT: [[COERCE46_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 0 +// CHECK-NEXT: [[COERCE46_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 1 +// CHECK-NEXT: [[COERCE46_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 2 +// CHECK-NEXT: [[COERCE46_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE47]], align 16 +// CHECK-NEXT: [[COERCE47_TUPLE:%.*]] = load { , , , }, ptr [[COERCE47]], align 16 +// CHECK-NEXT: [[COERCE47_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 0 +// CHECK-NEXT: [[COERCE47_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 1 +// CHECK-NEXT: [[COERCE47_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 2 +// CHECK-NEXT: [[COERCE47_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f12svuint64x4_tS_( [[COERCE46_EXTRACT0]], [[COERCE46_EXTRACT1]], [[COERCE46_EXTRACT2]], [[COERCE46_EXTRACT3]], [[COERCE47_EXTRACT0]], [[COERCE47_EXTRACT1]], [[COERCE47_EXTRACT2]], [[COERCE47_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE48]], align 16 +// CHECK-NEXT: [[COERCE48_TUPLE:%.*]] = load { , }, ptr [[COERCE48]], align 16 +// CHECK-NEXT: [[COERCE48_EXTRACT0:%.*]] = extractvalue { , } [[COERCE48_TUPLE]], 0 +// CHECK-NEXT: [[COERCE48_EXTRACT1:%.*]] = extractvalue { , } [[COERCE48_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE49]], align 16 +// CHECK-NEXT: [[COERCE49_TUPLE:%.*]] = load { , }, ptr [[COERCE49]], align 16 +// CHECK-NEXT: [[COERCE49_EXTRACT0:%.*]] = extractvalue { , } [[COERCE49_TUPLE]], 0 +// CHECK-NEXT: [[COERCE49_EXTRACT1:%.*]] = extractvalue { , } [[COERCE49_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f13svfloat16x2_tS_( [[COERCE48_EXTRACT0]], [[COERCE48_EXTRACT1]], [[COERCE49_EXTRACT0]], [[COERCE49_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE50]], align 16 +// CHECK-NEXT: [[COERCE50_TUPLE:%.*]] = load { , , }, ptr [[COERCE50]], align 16 +// CHECK-NEXT: [[COERCE50_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE50_TUPLE]], 0 +// CHECK-NEXT: [[COERCE50_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE50_TUPLE]], 1 +// CHECK-NEXT: [[COERCE50_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE50_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE51]], align 16 +// CHECK-NEXT: [[COERCE51_TUPLE:%.*]] = load { , , }, ptr [[COERCE51]], align 16 +// CHECK-NEXT: [[COERCE51_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE51_TUPLE]], 0 +// CHECK-NEXT: [[COERCE51_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE51_TUPLE]], 1 +// CHECK-NEXT: [[COERCE51_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE51_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f13svfloat16x3_tS_( [[COERCE50_EXTRACT0]], [[COERCE50_EXTRACT1]], [[COERCE50_EXTRACT2]], [[COERCE51_EXTRACT0]], [[COERCE51_EXTRACT1]], [[COERCE51_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE52]], align 16 +// CHECK-NEXT: [[COERCE52_TUPLE:%.*]] = load { , , , }, ptr [[COERCE52]], align 16 +// CHECK-NEXT: [[COERCE52_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 0 +// CHECK-NEXT: [[COERCE52_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 1 +// CHECK-NEXT: [[COERCE52_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 2 +// CHECK-NEXT: [[COERCE52_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE53]], align 16 +// CHECK-NEXT: [[COERCE53_TUPLE:%.*]] = load { , , , }, ptr [[COERCE53]], align 16 +// CHECK-NEXT: [[COERCE53_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 0 +// CHECK-NEXT: [[COERCE53_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 1 +// CHECK-NEXT: [[COERCE53_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 2 +// CHECK-NEXT: [[COERCE53_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f13svfloat16x4_tS_( [[COERCE52_EXTRACT0]], [[COERCE52_EXTRACT1]], [[COERCE52_EXTRACT2]], [[COERCE52_EXTRACT3]], [[COERCE53_EXTRACT0]], [[COERCE53_EXTRACT1]], [[COERCE53_EXTRACT2]], [[COERCE53_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE54]], align 16 +// CHECK-NEXT: [[COERCE54_TUPLE:%.*]] = load { , }, ptr [[COERCE54]], align 16 +// CHECK-NEXT: [[COERCE54_EXTRACT0:%.*]] = extractvalue { , } [[COERCE54_TUPLE]], 0 +// CHECK-NEXT: [[COERCE54_EXTRACT1:%.*]] = extractvalue { , } [[COERCE54_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE55]], align 16 +// CHECK-NEXT: [[COERCE55_TUPLE:%.*]] = load { , }, ptr [[COERCE55]], align 16 +// CHECK-NEXT: [[COERCE55_EXTRACT0:%.*]] = extractvalue { , } [[COERCE55_TUPLE]], 0 +// CHECK-NEXT: [[COERCE55_EXTRACT1:%.*]] = extractvalue { , } [[COERCE55_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f13svfloat32x2_tS_( [[COERCE54_EXTRACT0]], [[COERCE54_EXTRACT1]], [[COERCE55_EXTRACT0]], [[COERCE55_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE56]], align 16 +// CHECK-NEXT: [[COERCE56_TUPLE:%.*]] = load { , , }, ptr [[COERCE56]], align 16 +// CHECK-NEXT: [[COERCE56_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE56_TUPLE]], 0 +// CHECK-NEXT: [[COERCE56_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE56_TUPLE]], 1 +// CHECK-NEXT: [[COERCE56_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE56_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE57]], align 16 +// CHECK-NEXT: [[COERCE57_TUPLE:%.*]] = load { , , }, ptr [[COERCE57]], align 16 +// CHECK-NEXT: [[COERCE57_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE57_TUPLE]], 0 +// CHECK-NEXT: [[COERCE57_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE57_TUPLE]], 1 +// CHECK-NEXT: [[COERCE57_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE57_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f13svfloat32x3_tS_( [[COERCE56_EXTRACT0]], [[COERCE56_EXTRACT1]], [[COERCE56_EXTRACT2]], [[COERCE57_EXTRACT0]], [[COERCE57_EXTRACT1]], [[COERCE57_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE58]], align 16 +// CHECK-NEXT: [[COERCE58_TUPLE:%.*]] = load { , , , }, ptr [[COERCE58]], align 16 +// CHECK-NEXT: [[COERCE58_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 0 +// CHECK-NEXT: [[COERCE58_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 1 +// CHECK-NEXT: [[COERCE58_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 2 +// CHECK-NEXT: [[COERCE58_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE59]], align 16 +// CHECK-NEXT: [[COERCE59_TUPLE:%.*]] = load { , , , }, ptr [[COERCE59]], align 16 +// CHECK-NEXT: [[COERCE59_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 0 +// CHECK-NEXT: [[COERCE59_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 1 +// CHECK-NEXT: [[COERCE59_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 2 +// CHECK-NEXT: [[COERCE59_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f13svfloat32x4_tS_( [[COERCE58_EXTRACT0]], [[COERCE58_EXTRACT1]], [[COERCE58_EXTRACT2]], [[COERCE58_EXTRACT3]], [[COERCE59_EXTRACT0]], [[COERCE59_EXTRACT1]], [[COERCE59_EXTRACT2]], [[COERCE59_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE60]], align 16 +// CHECK-NEXT: [[COERCE60_TUPLE:%.*]] = load { , }, ptr [[COERCE60]], align 16 +// CHECK-NEXT: [[COERCE60_EXTRACT0:%.*]] = extractvalue { , } [[COERCE60_TUPLE]], 0 +// CHECK-NEXT: [[COERCE60_EXTRACT1:%.*]] = extractvalue { , } [[COERCE60_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE61]], align 16 +// CHECK-NEXT: [[COERCE61_TUPLE:%.*]] = load { , }, ptr [[COERCE61]], align 16 +// CHECK-NEXT: [[COERCE61_EXTRACT0:%.*]] = extractvalue { , } [[COERCE61_TUPLE]], 0 +// CHECK-NEXT: [[COERCE61_EXTRACT1:%.*]] = extractvalue { , } [[COERCE61_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f13svfloat64x2_tS_( [[COERCE60_EXTRACT0]], [[COERCE60_EXTRACT1]], [[COERCE61_EXTRACT0]], [[COERCE61_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE62]], align 16 +// CHECK-NEXT: [[COERCE62_TUPLE:%.*]] = load { , , }, ptr [[COERCE62]], align 16 +// CHECK-NEXT: [[COERCE62_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE62_TUPLE]], 0 +// CHECK-NEXT: [[COERCE62_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE62_TUPLE]], 1 +// CHECK-NEXT: [[COERCE62_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE62_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE63]], align 16 +// CHECK-NEXT: [[COERCE63_TUPLE:%.*]] = load { , , }, ptr [[COERCE63]], align 16 +// CHECK-NEXT: [[COERCE63_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE63_TUPLE]], 0 +// CHECK-NEXT: [[COERCE63_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE63_TUPLE]], 1 +// CHECK-NEXT: [[COERCE63_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE63_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f13svfloat64x3_tS_( [[COERCE62_EXTRACT0]], [[COERCE62_EXTRACT1]], [[COERCE62_EXTRACT2]], [[COERCE63_EXTRACT0]], [[COERCE63_EXTRACT1]], [[COERCE63_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE64]], align 16 +// CHECK-NEXT: [[COERCE64_TUPLE:%.*]] = load { , , , }, ptr [[COERCE64]], align 16 +// CHECK-NEXT: [[COERCE64_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 0 +// CHECK-NEXT: [[COERCE64_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 1 +// CHECK-NEXT: [[COERCE64_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 2 +// CHECK-NEXT: [[COERCE64_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE65]], align 16 +// CHECK-NEXT: [[COERCE65_TUPLE:%.*]] = load { , , , }, ptr [[COERCE65]], align 16 +// CHECK-NEXT: [[COERCE65_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 0 +// CHECK-NEXT: [[COERCE65_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 1 +// CHECK-NEXT: [[COERCE65_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 2 +// CHECK-NEXT: [[COERCE65_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f13svfloat64x4_tS_( [[COERCE64_EXTRACT0]], [[COERCE64_EXTRACT1]], [[COERCE64_EXTRACT2]], [[COERCE64_EXTRACT3]], [[COERCE65_EXTRACT0]], [[COERCE65_EXTRACT1]], [[COERCE65_EXTRACT2]], [[COERCE65_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE66]], align 16 +// CHECK-NEXT: [[COERCE66_TUPLE:%.*]] = load { , }, ptr [[COERCE66]], align 16 +// CHECK-NEXT: [[COERCE66_EXTRACT0:%.*]] = extractvalue { , } [[COERCE66_TUPLE]], 0 +// CHECK-NEXT: [[COERCE66_EXTRACT1:%.*]] = extractvalue { , } [[COERCE66_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE67]], align 16 +// CHECK-NEXT: [[COERCE67_TUPLE:%.*]] = load { , }, ptr [[COERCE67]], align 16 +// CHECK-NEXT: [[COERCE67_EXTRACT0:%.*]] = extractvalue { , } [[COERCE67_TUPLE]], 0 +// CHECK-NEXT: [[COERCE67_EXTRACT1:%.*]] = extractvalue { , } [[COERCE67_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f14svbfloat16x2_tS_( [[COERCE66_EXTRACT0]], [[COERCE66_EXTRACT1]], [[COERCE67_EXTRACT0]], [[COERCE67_EXTRACT1]]) +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE68]], align 16 +// CHECK-NEXT: [[COERCE68_TUPLE:%.*]] = load { , , }, ptr [[COERCE68]], align 16 +// CHECK-NEXT: [[COERCE68_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE68_TUPLE]], 0 +// CHECK-NEXT: [[COERCE68_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE68_TUPLE]], 1 +// CHECK-NEXT: [[COERCE68_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE68_TUPLE]], 2 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[COERCE69]], align 16 +// CHECK-NEXT: [[COERCE69_TUPLE:%.*]] = load { , , }, ptr [[COERCE69]], align 16 +// CHECK-NEXT: [[COERCE69_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE69_TUPLE]], 0 +// CHECK-NEXT: [[COERCE69_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE69_TUPLE]], 1 +// CHECK-NEXT: [[COERCE69_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE69_TUPLE]], 2 +// CHECK-NEXT: call void @_Z1f14svbfloat16x3_tS_( [[COERCE68_EXTRACT0]], [[COERCE68_EXTRACT1]], [[COERCE68_EXTRACT2]], [[COERCE69_EXTRACT0]], [[COERCE69_EXTRACT1]], [[COERCE69_EXTRACT2]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE70]], align 16 +// CHECK-NEXT: [[COERCE70_TUPLE:%.*]] = load { , , , }, ptr [[COERCE70]], align 16 +// CHECK-NEXT: [[COERCE70_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 0 +// CHECK-NEXT: [[COERCE70_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 1 +// CHECK-NEXT: [[COERCE70_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 2 +// CHECK-NEXT: [[COERCE70_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE71]], align 16 +// CHECK-NEXT: [[COERCE71_TUPLE:%.*]] = load { , , , }, ptr [[COERCE71]], align 16 +// CHECK-NEXT: [[COERCE71_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 0 +// CHECK-NEXT: [[COERCE71_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 1 +// CHECK-NEXT: [[COERCE71_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 2 +// CHECK-NEXT: [[COERCE71_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f14svbfloat16x4_tS_( [[COERCE70_EXTRACT0]], [[COERCE70_EXTRACT1]], [[COERCE70_EXTRACT2]], [[COERCE70_EXTRACT3]], [[COERCE71_EXTRACT0]], [[COERCE71_EXTRACT1]], [[COERCE71_EXTRACT2]], [[COERCE71_EXTRACT3]]) +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE72]], align 2 +// CHECK-NEXT: [[COERCE72_TUPLE:%.*]] = load { , }, ptr [[COERCE72]], align 2 +// CHECK-NEXT: [[COERCE72_EXTRACT0:%.*]] = extractvalue { , } [[COERCE72_TUPLE]], 0 +// CHECK-NEXT: [[COERCE72_EXTRACT1:%.*]] = extractvalue { , } [[COERCE72_TUPLE]], 1 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[COERCE73]], align 2 +// CHECK-NEXT: [[COERCE73_TUPLE:%.*]] = load { , }, ptr [[COERCE73]], align 2 +// CHECK-NEXT: [[COERCE73_EXTRACT0:%.*]] = extractvalue { , } [[COERCE73_TUPLE]], 0 +// CHECK-NEXT: [[COERCE73_EXTRACT1:%.*]] = extractvalue { , } [[COERCE73_TUPLE]], 1 +// CHECK-NEXT: call void @_Z1f10svboolx2_tS_( [[COERCE72_EXTRACT0]], [[COERCE72_EXTRACT1]], [[COERCE73_EXTRACT0]], [[COERCE73_EXTRACT1]]) +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE74]], align 2 +// CHECK-NEXT: [[COERCE74_TUPLE:%.*]] = load { , , , }, ptr [[COERCE74]], align 2 +// CHECK-NEXT: [[COERCE74_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 0 +// CHECK-NEXT: [[COERCE74_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 1 +// CHECK-NEXT: [[COERCE74_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 2 +// CHECK-NEXT: [[COERCE74_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 3 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[COERCE75]], align 2 +// CHECK-NEXT: [[COERCE75_TUPLE:%.*]] = load { , , , }, ptr [[COERCE75]], align 2 +// CHECK-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 0 +// CHECK-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 1 +// CHECK-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 2 +// CHECK-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 3 +// CHECK-NEXT: call void @_Z1f10svboolx4_tS_( [[COERCE74_EXTRACT0]], [[COERCE74_EXTRACT1]], [[COERCE74_EXTRACT2]], [[COERCE74_EXTRACT3]], [[COERCE75_EXTRACT0]], [[COERCE75_EXTRACT1]], [[COERCE75_EXTRACT2]], [[COERCE75_EXTRACT3]]) // CHECK-NEXT: ret void // // COMPAT_17-LABEL: define dso_local void @_Z3foov( // COMPAT_17-SAME: ) #[[ATTR0:[0-9]+]] { // COMPAT_17-NEXT: entry: +// COMPAT_17-NEXT: [[COERCE:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE1:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE2:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE3:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE4:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE5:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE6:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE7:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE8:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE9:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE10:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE11:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE12:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE13:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE14:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE15:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE16:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE17:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE18:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE19:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE20:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE21:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE22:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE23:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE24:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE25:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE26:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE27:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE28:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE29:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE30:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE31:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE32:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE33:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE34:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE35:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE36:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE37:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE38:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE39:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE40:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE41:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE42:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE43:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE44:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE45:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE46:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE47:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE48:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE49:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE50:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE51:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE52:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE53:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE54:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE55:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE56:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE57:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE58:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE59:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE60:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE61:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE62:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE63:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE64:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE65:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE66:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE67:%.*]] = alloca { , }, align 16 +// COMPAT_17-NEXT: [[COERCE68:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE69:%.*]] = alloca { , , }, align 16 +// COMPAT_17-NEXT: [[COERCE70:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE71:%.*]] = alloca { , , , }, align 16 +// COMPAT_17-NEXT: [[COERCE72:%.*]] = alloca { , }, align 2 +// COMPAT_17-NEXT: [[COERCE73:%.*]] = alloca { , }, align 2 +// COMPAT_17-NEXT: [[COERCE74:%.*]] = alloca { , , , }, align 2 +// COMPAT_17-NEXT: [[COERCE75:%.*]] = alloca { , , , }, align 2 // COMPAT_17-NEXT: call void @_Z1fu10__SVInt8_tu10__SVInt8_t( zeroinitializer, zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t( zeroinitializer, zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t( zeroinitializer, zeroinitializer) @@ -135,44 +667,424 @@ void f(__clang_svboolx4_t, __clang_svboolx4_t); // COMPAT_17-NEXT: call void @_Z1fu14__SVBFloat16_tu14__SVBFloat16_t( zeroinitializer, zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu10__SVBool_tu10__SVBool_t( zeroinitializer, zeroinitializer) // COMPAT_17-NEXT: call void @_Z1fu11__SVCount_tu11__SVCount_t(target("aarch64.svcount") zeroinitializer, target("aarch64.svcount") zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f10svint8x2_t10svint8x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f10svint8x3_t10svint8x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f10svint8x4_t10svint8x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint16x2_t11svint16x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint16x3_t11svint16x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint16x4_t11svint16x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint32x2_t11svint32x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint32x3_t11svint32x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint32x4_t11svint32x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint64x2_t11svint64x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint64x3_t11svint64x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svint64x4_t11svint64x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svuint8x2_t11svuint8x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svuint8x3_t11svuint8x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f11svuint8x4_t11svuint8x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint16x2_t12svuint16x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint16x3_t12svuint16x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint16x4_t12svuint16x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint32x2_t12svuint32x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint32x3_t12svuint32x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint32x4_t12svuint32x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint64x2_t12svuint64x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint64x3_t12svuint64x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f12svuint64x4_t12svuint64x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat16x2_t13svfloat16x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat16x3_t13svfloat16x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat16x4_t13svfloat16x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat32x2_t13svfloat32x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat32x3_t13svfloat32x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat32x4_t13svfloat32x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat64x2_t13svfloat64x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat64x3_t13svfloat64x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f13svfloat64x4_t13svfloat64x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f14svbfloat16x2_t14svbfloat16x2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f14svbfloat16x3_t14svbfloat16x3_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f14svbfloat16x4_t14svbfloat16x4_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f10svboolx2_t10svboolx2_t( zeroinitializer, zeroinitializer) -// COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t( zeroinitializer, zeroinitializer) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE]], align 16 +// COMPAT_17-NEXT: [[COERCE_TUPLE:%.*]] = load { , }, ptr [[COERCE]], align 16 +// COMPAT_17-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE1]], align 16 +// COMPAT_17-NEXT: [[COERCE1_TUPLE:%.*]] = load { , }, ptr [[COERCE1]], align 16 +// COMPAT_17-NEXT: [[COERCE1_EXTRACT0:%.*]] = extractvalue { , } [[COERCE1_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE1_EXTRACT1:%.*]] = extractvalue { , } [[COERCE1_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f10svint8x2_t10svint8x2_t( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]], [[COERCE1_EXTRACT0]], [[COERCE1_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE2]], align 16 +// COMPAT_17-NEXT: [[COERCE2_TUPLE:%.*]] = load { , , }, ptr [[COERCE2]], align 16 +// COMPAT_17-NEXT: [[COERCE2_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE2_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE2_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE2_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE2_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE2_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE3]], align 16 +// COMPAT_17-NEXT: [[COERCE3_TUPLE:%.*]] = load { , , }, ptr [[COERCE3]], align 16 +// COMPAT_17-NEXT: [[COERCE3_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE3_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE3_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE3_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE3_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE3_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f10svint8x3_t10svint8x3_t( [[COERCE2_EXTRACT0]], [[COERCE2_EXTRACT1]], [[COERCE2_EXTRACT2]], [[COERCE3_EXTRACT0]], [[COERCE3_EXTRACT1]], [[COERCE3_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE4]], align 16 +// COMPAT_17-NEXT: [[COERCE4_TUPLE:%.*]] = load { , , , }, ptr [[COERCE4]], align 16 +// COMPAT_17-NEXT: [[COERCE4_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE4_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE4_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE4_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE4_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE5]], align 16 +// COMPAT_17-NEXT: [[COERCE5_TUPLE:%.*]] = load { , , , }, ptr [[COERCE5]], align 16 +// COMPAT_17-NEXT: [[COERCE5_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE5_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE5_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE5_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE5_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f10svint8x4_t10svint8x4_t( [[COERCE4_EXTRACT0]], [[COERCE4_EXTRACT1]], [[COERCE4_EXTRACT2]], [[COERCE4_EXTRACT3]], [[COERCE5_EXTRACT0]], [[COERCE5_EXTRACT1]], [[COERCE5_EXTRACT2]], [[COERCE5_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE6]], align 16 +// COMPAT_17-NEXT: [[COERCE6_TUPLE:%.*]] = load { , }, ptr [[COERCE6]], align 16 +// COMPAT_17-NEXT: [[COERCE6_EXTRACT0:%.*]] = extractvalue { , } [[COERCE6_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE6_EXTRACT1:%.*]] = extractvalue { , } [[COERCE6_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE7]], align 16 +// COMPAT_17-NEXT: [[COERCE7_TUPLE:%.*]] = load { , }, ptr [[COERCE7]], align 16 +// COMPAT_17-NEXT: [[COERCE7_EXTRACT0:%.*]] = extractvalue { , } [[COERCE7_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE7_EXTRACT1:%.*]] = extractvalue { , } [[COERCE7_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f11svint16x2_t11svint16x2_t( [[COERCE6_EXTRACT0]], [[COERCE6_EXTRACT1]], [[COERCE7_EXTRACT0]], [[COERCE7_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE8]], align 16 +// COMPAT_17-NEXT: [[COERCE8_TUPLE:%.*]] = load { , , }, ptr [[COERCE8]], align 16 +// COMPAT_17-NEXT: [[COERCE8_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE8_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE8_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE8_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE8_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE8_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE9]], align 16 +// COMPAT_17-NEXT: [[COERCE9_TUPLE:%.*]] = load { , , }, ptr [[COERCE9]], align 16 +// COMPAT_17-NEXT: [[COERCE9_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE9_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE9_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE9_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE9_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE9_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f11svint16x3_t11svint16x3_t( [[COERCE8_EXTRACT0]], [[COERCE8_EXTRACT1]], [[COERCE8_EXTRACT2]], [[COERCE9_EXTRACT0]], [[COERCE9_EXTRACT1]], [[COERCE9_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE10]], align 16 +// COMPAT_17-NEXT: [[COERCE10_TUPLE:%.*]] = load { , , , }, ptr [[COERCE10]], align 16 +// COMPAT_17-NEXT: [[COERCE10_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE10_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE10_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE10_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE10_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE11]], align 16 +// COMPAT_17-NEXT: [[COERCE11_TUPLE:%.*]] = load { , , , }, ptr [[COERCE11]], align 16 +// COMPAT_17-NEXT: [[COERCE11_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE11_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE11_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE11_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE11_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f11svint16x4_t11svint16x4_t( [[COERCE10_EXTRACT0]], [[COERCE10_EXTRACT1]], [[COERCE10_EXTRACT2]], [[COERCE10_EXTRACT3]], [[COERCE11_EXTRACT0]], [[COERCE11_EXTRACT1]], [[COERCE11_EXTRACT2]], [[COERCE11_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE12]], align 16 +// COMPAT_17-NEXT: [[COERCE12_TUPLE:%.*]] = load { , }, ptr [[COERCE12]], align 16 +// COMPAT_17-NEXT: [[COERCE12_EXTRACT0:%.*]] = extractvalue { , } [[COERCE12_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE12_EXTRACT1:%.*]] = extractvalue { , } [[COERCE12_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE13]], align 16 +// COMPAT_17-NEXT: [[COERCE13_TUPLE:%.*]] = load { , }, ptr [[COERCE13]], align 16 +// COMPAT_17-NEXT: [[COERCE13_EXTRACT0:%.*]] = extractvalue { , } [[COERCE13_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE13_EXTRACT1:%.*]] = extractvalue { , } [[COERCE13_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f11svint32x2_t11svint32x2_t( [[COERCE12_EXTRACT0]], [[COERCE12_EXTRACT1]], [[COERCE13_EXTRACT0]], [[COERCE13_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE14]], align 16 +// COMPAT_17-NEXT: [[COERCE14_TUPLE:%.*]] = load { , , }, ptr [[COERCE14]], align 16 +// COMPAT_17-NEXT: [[COERCE14_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE14_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE14_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE14_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE14_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE14_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE15]], align 16 +// COMPAT_17-NEXT: [[COERCE15_TUPLE:%.*]] = load { , , }, ptr [[COERCE15]], align 16 +// COMPAT_17-NEXT: [[COERCE15_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE15_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE15_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE15_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE15_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE15_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f11svint32x3_t11svint32x3_t( [[COERCE14_EXTRACT0]], [[COERCE14_EXTRACT1]], [[COERCE14_EXTRACT2]], [[COERCE15_EXTRACT0]], [[COERCE15_EXTRACT1]], [[COERCE15_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE16]], align 16 +// COMPAT_17-NEXT: [[COERCE16_TUPLE:%.*]] = load { , , , }, ptr [[COERCE16]], align 16 +// COMPAT_17-NEXT: [[COERCE16_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE16_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE16_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE16_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE16_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE17]], align 16 +// COMPAT_17-NEXT: [[COERCE17_TUPLE:%.*]] = load { , , , }, ptr [[COERCE17]], align 16 +// COMPAT_17-NEXT: [[COERCE17_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE17_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE17_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE17_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE17_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f11svint32x4_t11svint32x4_t( [[COERCE16_EXTRACT0]], [[COERCE16_EXTRACT1]], [[COERCE16_EXTRACT2]], [[COERCE16_EXTRACT3]], [[COERCE17_EXTRACT0]], [[COERCE17_EXTRACT1]], [[COERCE17_EXTRACT2]], [[COERCE17_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE18]], align 16 +// COMPAT_17-NEXT: [[COERCE18_TUPLE:%.*]] = load { , }, ptr [[COERCE18]], align 16 +// COMPAT_17-NEXT: [[COERCE18_EXTRACT0:%.*]] = extractvalue { , } [[COERCE18_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE18_EXTRACT1:%.*]] = extractvalue { , } [[COERCE18_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE19]], align 16 +// COMPAT_17-NEXT: [[COERCE19_TUPLE:%.*]] = load { , }, ptr [[COERCE19]], align 16 +// COMPAT_17-NEXT: [[COERCE19_EXTRACT0:%.*]] = extractvalue { , } [[COERCE19_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE19_EXTRACT1:%.*]] = extractvalue { , } [[COERCE19_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f11svint64x2_t11svint64x2_t( [[COERCE18_EXTRACT0]], [[COERCE18_EXTRACT1]], [[COERCE19_EXTRACT0]], [[COERCE19_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE20]], align 16 +// COMPAT_17-NEXT: [[COERCE20_TUPLE:%.*]] = load { , , }, ptr [[COERCE20]], align 16 +// COMPAT_17-NEXT: [[COERCE20_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE20_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE20_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE20_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE20_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE20_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE21]], align 16 +// COMPAT_17-NEXT: [[COERCE21_TUPLE:%.*]] = load { , , }, ptr [[COERCE21]], align 16 +// COMPAT_17-NEXT: [[COERCE21_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE21_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE21_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE21_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE21_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE21_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f11svint64x3_t11svint64x3_t( [[COERCE20_EXTRACT0]], [[COERCE20_EXTRACT1]], [[COERCE20_EXTRACT2]], [[COERCE21_EXTRACT0]], [[COERCE21_EXTRACT1]], [[COERCE21_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE22]], align 16 +// COMPAT_17-NEXT: [[COERCE22_TUPLE:%.*]] = load { , , , }, ptr [[COERCE22]], align 16 +// COMPAT_17-NEXT: [[COERCE22_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE22_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE22_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE22_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE22_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE23]], align 16 +// COMPAT_17-NEXT: [[COERCE23_TUPLE:%.*]] = load { , , , }, ptr [[COERCE23]], align 16 +// COMPAT_17-NEXT: [[COERCE23_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE23_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE23_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE23_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE23_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f11svint64x4_t11svint64x4_t( [[COERCE22_EXTRACT0]], [[COERCE22_EXTRACT1]], [[COERCE22_EXTRACT2]], [[COERCE22_EXTRACT3]], [[COERCE23_EXTRACT0]], [[COERCE23_EXTRACT1]], [[COERCE23_EXTRACT2]], [[COERCE23_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE24]], align 16 +// COMPAT_17-NEXT: [[COERCE24_TUPLE:%.*]] = load { , }, ptr [[COERCE24]], align 16 +// COMPAT_17-NEXT: [[COERCE24_EXTRACT0:%.*]] = extractvalue { , } [[COERCE24_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE24_EXTRACT1:%.*]] = extractvalue { , } [[COERCE24_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE25]], align 16 +// COMPAT_17-NEXT: [[COERCE25_TUPLE:%.*]] = load { , }, ptr [[COERCE25]], align 16 +// COMPAT_17-NEXT: [[COERCE25_EXTRACT0:%.*]] = extractvalue { , } [[COERCE25_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE25_EXTRACT1:%.*]] = extractvalue { , } [[COERCE25_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f11svuint8x2_t11svuint8x2_t( [[COERCE24_EXTRACT0]], [[COERCE24_EXTRACT1]], [[COERCE25_EXTRACT0]], [[COERCE25_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE26]], align 16 +// COMPAT_17-NEXT: [[COERCE26_TUPLE:%.*]] = load { , , }, ptr [[COERCE26]], align 16 +// COMPAT_17-NEXT: [[COERCE26_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE26_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE26_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE26_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE26_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE26_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE27]], align 16 +// COMPAT_17-NEXT: [[COERCE27_TUPLE:%.*]] = load { , , }, ptr [[COERCE27]], align 16 +// COMPAT_17-NEXT: [[COERCE27_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE27_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE27_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE27_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE27_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE27_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f11svuint8x3_t11svuint8x3_t( [[COERCE26_EXTRACT0]], [[COERCE26_EXTRACT1]], [[COERCE26_EXTRACT2]], [[COERCE27_EXTRACT0]], [[COERCE27_EXTRACT1]], [[COERCE27_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE28]], align 16 +// COMPAT_17-NEXT: [[COERCE28_TUPLE:%.*]] = load { , , , }, ptr [[COERCE28]], align 16 +// COMPAT_17-NEXT: [[COERCE28_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE28_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE28_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE28_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE28_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE29]], align 16 +// COMPAT_17-NEXT: [[COERCE29_TUPLE:%.*]] = load { , , , }, ptr [[COERCE29]], align 16 +// COMPAT_17-NEXT: [[COERCE29_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE29_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE29_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE29_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE29_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f11svuint8x4_t11svuint8x4_t( [[COERCE28_EXTRACT0]], [[COERCE28_EXTRACT1]], [[COERCE28_EXTRACT2]], [[COERCE28_EXTRACT3]], [[COERCE29_EXTRACT0]], [[COERCE29_EXTRACT1]], [[COERCE29_EXTRACT2]], [[COERCE29_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE30]], align 16 +// COMPAT_17-NEXT: [[COERCE30_TUPLE:%.*]] = load { , }, ptr [[COERCE30]], align 16 +// COMPAT_17-NEXT: [[COERCE30_EXTRACT0:%.*]] = extractvalue { , } [[COERCE30_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE30_EXTRACT1:%.*]] = extractvalue { , } [[COERCE30_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE31]], align 16 +// COMPAT_17-NEXT: [[COERCE31_TUPLE:%.*]] = load { , }, ptr [[COERCE31]], align 16 +// COMPAT_17-NEXT: [[COERCE31_EXTRACT0:%.*]] = extractvalue { , } [[COERCE31_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE31_EXTRACT1:%.*]] = extractvalue { , } [[COERCE31_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f12svuint16x2_t12svuint16x2_t( [[COERCE30_EXTRACT0]], [[COERCE30_EXTRACT1]], [[COERCE31_EXTRACT0]], [[COERCE31_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE32]], align 16 +// COMPAT_17-NEXT: [[COERCE32_TUPLE:%.*]] = load { , , }, ptr [[COERCE32]], align 16 +// COMPAT_17-NEXT: [[COERCE32_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE32_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE32_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE32_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE32_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE32_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE33]], align 16 +// COMPAT_17-NEXT: [[COERCE33_TUPLE:%.*]] = load { , , }, ptr [[COERCE33]], align 16 +// COMPAT_17-NEXT: [[COERCE33_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE33_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE33_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE33_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE33_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE33_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f12svuint16x3_t12svuint16x3_t( [[COERCE32_EXTRACT0]], [[COERCE32_EXTRACT1]], [[COERCE32_EXTRACT2]], [[COERCE33_EXTRACT0]], [[COERCE33_EXTRACT1]], [[COERCE33_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE34]], align 16 +// COMPAT_17-NEXT: [[COERCE34_TUPLE:%.*]] = load { , , , }, ptr [[COERCE34]], align 16 +// COMPAT_17-NEXT: [[COERCE34_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE34_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE34_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE34_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE34_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE35]], align 16 +// COMPAT_17-NEXT: [[COERCE35_TUPLE:%.*]] = load { , , , }, ptr [[COERCE35]], align 16 +// COMPAT_17-NEXT: [[COERCE35_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE35_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE35_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE35_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE35_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f12svuint16x4_t12svuint16x4_t( [[COERCE34_EXTRACT0]], [[COERCE34_EXTRACT1]], [[COERCE34_EXTRACT2]], [[COERCE34_EXTRACT3]], [[COERCE35_EXTRACT0]], [[COERCE35_EXTRACT1]], [[COERCE35_EXTRACT2]], [[COERCE35_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE36]], align 16 +// COMPAT_17-NEXT: [[COERCE36_TUPLE:%.*]] = load { , }, ptr [[COERCE36]], align 16 +// COMPAT_17-NEXT: [[COERCE36_EXTRACT0:%.*]] = extractvalue { , } [[COERCE36_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE36_EXTRACT1:%.*]] = extractvalue { , } [[COERCE36_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE37]], align 16 +// COMPAT_17-NEXT: [[COERCE37_TUPLE:%.*]] = load { , }, ptr [[COERCE37]], align 16 +// COMPAT_17-NEXT: [[COERCE37_EXTRACT0:%.*]] = extractvalue { , } [[COERCE37_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE37_EXTRACT1:%.*]] = extractvalue { , } [[COERCE37_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f12svuint32x2_t12svuint32x2_t( [[COERCE36_EXTRACT0]], [[COERCE36_EXTRACT1]], [[COERCE37_EXTRACT0]], [[COERCE37_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE38]], align 16 +// COMPAT_17-NEXT: [[COERCE38_TUPLE:%.*]] = load { , , }, ptr [[COERCE38]], align 16 +// COMPAT_17-NEXT: [[COERCE38_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE38_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE38_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE38_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE38_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE38_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE39]], align 16 +// COMPAT_17-NEXT: [[COERCE39_TUPLE:%.*]] = load { , , }, ptr [[COERCE39]], align 16 +// COMPAT_17-NEXT: [[COERCE39_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE39_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE39_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE39_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE39_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE39_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f12svuint32x3_t12svuint32x3_t( [[COERCE38_EXTRACT0]], [[COERCE38_EXTRACT1]], [[COERCE38_EXTRACT2]], [[COERCE39_EXTRACT0]], [[COERCE39_EXTRACT1]], [[COERCE39_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE40]], align 16 +// COMPAT_17-NEXT: [[COERCE40_TUPLE:%.*]] = load { , , , }, ptr [[COERCE40]], align 16 +// COMPAT_17-NEXT: [[COERCE40_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE40_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE40_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE40_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE40_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE41]], align 16 +// COMPAT_17-NEXT: [[COERCE41_TUPLE:%.*]] = load { , , , }, ptr [[COERCE41]], align 16 +// COMPAT_17-NEXT: [[COERCE41_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE41_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE41_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE41_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE41_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f12svuint32x4_t12svuint32x4_t( [[COERCE40_EXTRACT0]], [[COERCE40_EXTRACT1]], [[COERCE40_EXTRACT2]], [[COERCE40_EXTRACT3]], [[COERCE41_EXTRACT0]], [[COERCE41_EXTRACT1]], [[COERCE41_EXTRACT2]], [[COERCE41_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE42]], align 16 +// COMPAT_17-NEXT: [[COERCE42_TUPLE:%.*]] = load { , }, ptr [[COERCE42]], align 16 +// COMPAT_17-NEXT: [[COERCE42_EXTRACT0:%.*]] = extractvalue { , } [[COERCE42_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE42_EXTRACT1:%.*]] = extractvalue { , } [[COERCE42_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE43]], align 16 +// COMPAT_17-NEXT: [[COERCE43_TUPLE:%.*]] = load { , }, ptr [[COERCE43]], align 16 +// COMPAT_17-NEXT: [[COERCE43_EXTRACT0:%.*]] = extractvalue { , } [[COERCE43_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE43_EXTRACT1:%.*]] = extractvalue { , } [[COERCE43_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f12svuint64x2_t12svuint64x2_t( [[COERCE42_EXTRACT0]], [[COERCE42_EXTRACT1]], [[COERCE43_EXTRACT0]], [[COERCE43_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE44]], align 16 +// COMPAT_17-NEXT: [[COERCE44_TUPLE:%.*]] = load { , , }, ptr [[COERCE44]], align 16 +// COMPAT_17-NEXT: [[COERCE44_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE44_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE44_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE44_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE44_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE44_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE45]], align 16 +// COMPAT_17-NEXT: [[COERCE45_TUPLE:%.*]] = load { , , }, ptr [[COERCE45]], align 16 +// COMPAT_17-NEXT: [[COERCE45_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE45_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE45_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE45_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE45_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE45_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f12svuint64x3_t12svuint64x3_t( [[COERCE44_EXTRACT0]], [[COERCE44_EXTRACT1]], [[COERCE44_EXTRACT2]], [[COERCE45_EXTRACT0]], [[COERCE45_EXTRACT1]], [[COERCE45_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE46]], align 16 +// COMPAT_17-NEXT: [[COERCE46_TUPLE:%.*]] = load { , , , }, ptr [[COERCE46]], align 16 +// COMPAT_17-NEXT: [[COERCE46_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE46_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE46_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE46_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE46_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE47]], align 16 +// COMPAT_17-NEXT: [[COERCE47_TUPLE:%.*]] = load { , , , }, ptr [[COERCE47]], align 16 +// COMPAT_17-NEXT: [[COERCE47_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE47_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE47_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE47_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE47_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f12svuint64x4_t12svuint64x4_t( [[COERCE46_EXTRACT0]], [[COERCE46_EXTRACT1]], [[COERCE46_EXTRACT2]], [[COERCE46_EXTRACT3]], [[COERCE47_EXTRACT0]], [[COERCE47_EXTRACT1]], [[COERCE47_EXTRACT2]], [[COERCE47_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE48]], align 16 +// COMPAT_17-NEXT: [[COERCE48_TUPLE:%.*]] = load { , }, ptr [[COERCE48]], align 16 +// COMPAT_17-NEXT: [[COERCE48_EXTRACT0:%.*]] = extractvalue { , } [[COERCE48_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE48_EXTRACT1:%.*]] = extractvalue { , } [[COERCE48_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE49]], align 16 +// COMPAT_17-NEXT: [[COERCE49_TUPLE:%.*]] = load { , }, ptr [[COERCE49]], align 16 +// COMPAT_17-NEXT: [[COERCE49_EXTRACT0:%.*]] = extractvalue { , } [[COERCE49_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE49_EXTRACT1:%.*]] = extractvalue { , } [[COERCE49_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f13svfloat16x2_t13svfloat16x2_t( [[COERCE48_EXTRACT0]], [[COERCE48_EXTRACT1]], [[COERCE49_EXTRACT0]], [[COERCE49_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE50]], align 16 +// COMPAT_17-NEXT: [[COERCE50_TUPLE:%.*]] = load { , , }, ptr [[COERCE50]], align 16 +// COMPAT_17-NEXT: [[COERCE50_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE50_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE50_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE50_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE50_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE50_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE51]], align 16 +// COMPAT_17-NEXT: [[COERCE51_TUPLE:%.*]] = load { , , }, ptr [[COERCE51]], align 16 +// COMPAT_17-NEXT: [[COERCE51_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE51_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE51_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE51_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE51_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE51_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f13svfloat16x3_t13svfloat16x3_t( [[COERCE50_EXTRACT0]], [[COERCE50_EXTRACT1]], [[COERCE50_EXTRACT2]], [[COERCE51_EXTRACT0]], [[COERCE51_EXTRACT1]], [[COERCE51_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE52]], align 16 +// COMPAT_17-NEXT: [[COERCE52_TUPLE:%.*]] = load { , , , }, ptr [[COERCE52]], align 16 +// COMPAT_17-NEXT: [[COERCE52_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE52_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE52_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE52_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE52_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE53]], align 16 +// COMPAT_17-NEXT: [[COERCE53_TUPLE:%.*]] = load { , , , }, ptr [[COERCE53]], align 16 +// COMPAT_17-NEXT: [[COERCE53_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE53_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE53_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE53_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE53_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f13svfloat16x4_t13svfloat16x4_t( [[COERCE52_EXTRACT0]], [[COERCE52_EXTRACT1]], [[COERCE52_EXTRACT2]], [[COERCE52_EXTRACT3]], [[COERCE53_EXTRACT0]], [[COERCE53_EXTRACT1]], [[COERCE53_EXTRACT2]], [[COERCE53_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE54]], align 16 +// COMPAT_17-NEXT: [[COERCE54_TUPLE:%.*]] = load { , }, ptr [[COERCE54]], align 16 +// COMPAT_17-NEXT: [[COERCE54_EXTRACT0:%.*]] = extractvalue { , } [[COERCE54_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE54_EXTRACT1:%.*]] = extractvalue { , } [[COERCE54_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE55]], align 16 +// COMPAT_17-NEXT: [[COERCE55_TUPLE:%.*]] = load { , }, ptr [[COERCE55]], align 16 +// COMPAT_17-NEXT: [[COERCE55_EXTRACT0:%.*]] = extractvalue { , } [[COERCE55_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE55_EXTRACT1:%.*]] = extractvalue { , } [[COERCE55_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f13svfloat32x2_t13svfloat32x2_t( [[COERCE54_EXTRACT0]], [[COERCE54_EXTRACT1]], [[COERCE55_EXTRACT0]], [[COERCE55_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE56]], align 16 +// COMPAT_17-NEXT: [[COERCE56_TUPLE:%.*]] = load { , , }, ptr [[COERCE56]], align 16 +// COMPAT_17-NEXT: [[COERCE56_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE56_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE56_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE56_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE56_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE56_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE57]], align 16 +// COMPAT_17-NEXT: [[COERCE57_TUPLE:%.*]] = load { , , }, ptr [[COERCE57]], align 16 +// COMPAT_17-NEXT: [[COERCE57_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE57_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE57_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE57_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE57_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE57_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f13svfloat32x3_t13svfloat32x3_t( [[COERCE56_EXTRACT0]], [[COERCE56_EXTRACT1]], [[COERCE56_EXTRACT2]], [[COERCE57_EXTRACT0]], [[COERCE57_EXTRACT1]], [[COERCE57_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE58]], align 16 +// COMPAT_17-NEXT: [[COERCE58_TUPLE:%.*]] = load { , , , }, ptr [[COERCE58]], align 16 +// COMPAT_17-NEXT: [[COERCE58_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE58_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE58_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE58_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE58_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE59]], align 16 +// COMPAT_17-NEXT: [[COERCE59_TUPLE:%.*]] = load { , , , }, ptr [[COERCE59]], align 16 +// COMPAT_17-NEXT: [[COERCE59_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE59_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE59_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE59_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE59_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f13svfloat32x4_t13svfloat32x4_t( [[COERCE58_EXTRACT0]], [[COERCE58_EXTRACT1]], [[COERCE58_EXTRACT2]], [[COERCE58_EXTRACT3]], [[COERCE59_EXTRACT0]], [[COERCE59_EXTRACT1]], [[COERCE59_EXTRACT2]], [[COERCE59_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE60]], align 16 +// COMPAT_17-NEXT: [[COERCE60_TUPLE:%.*]] = load { , }, ptr [[COERCE60]], align 16 +// COMPAT_17-NEXT: [[COERCE60_EXTRACT0:%.*]] = extractvalue { , } [[COERCE60_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE60_EXTRACT1:%.*]] = extractvalue { , } [[COERCE60_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE61]], align 16 +// COMPAT_17-NEXT: [[COERCE61_TUPLE:%.*]] = load { , }, ptr [[COERCE61]], align 16 +// COMPAT_17-NEXT: [[COERCE61_EXTRACT0:%.*]] = extractvalue { , } [[COERCE61_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE61_EXTRACT1:%.*]] = extractvalue { , } [[COERCE61_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f13svfloat64x2_t13svfloat64x2_t( [[COERCE60_EXTRACT0]], [[COERCE60_EXTRACT1]], [[COERCE61_EXTRACT0]], [[COERCE61_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE62]], align 16 +// COMPAT_17-NEXT: [[COERCE62_TUPLE:%.*]] = load { , , }, ptr [[COERCE62]], align 16 +// COMPAT_17-NEXT: [[COERCE62_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE62_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE62_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE62_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE62_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE62_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE63]], align 16 +// COMPAT_17-NEXT: [[COERCE63_TUPLE:%.*]] = load { , , }, ptr [[COERCE63]], align 16 +// COMPAT_17-NEXT: [[COERCE63_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE63_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE63_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE63_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE63_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE63_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f13svfloat64x3_t13svfloat64x3_t( [[COERCE62_EXTRACT0]], [[COERCE62_EXTRACT1]], [[COERCE62_EXTRACT2]], [[COERCE63_EXTRACT0]], [[COERCE63_EXTRACT1]], [[COERCE63_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE64]], align 16 +// COMPAT_17-NEXT: [[COERCE64_TUPLE:%.*]] = load { , , , }, ptr [[COERCE64]], align 16 +// COMPAT_17-NEXT: [[COERCE64_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE64_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE64_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE64_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE64_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE65]], align 16 +// COMPAT_17-NEXT: [[COERCE65_TUPLE:%.*]] = load { , , , }, ptr [[COERCE65]], align 16 +// COMPAT_17-NEXT: [[COERCE65_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE65_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE65_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE65_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE65_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f13svfloat64x4_t13svfloat64x4_t( [[COERCE64_EXTRACT0]], [[COERCE64_EXTRACT1]], [[COERCE64_EXTRACT2]], [[COERCE64_EXTRACT3]], [[COERCE65_EXTRACT0]], [[COERCE65_EXTRACT1]], [[COERCE65_EXTRACT2]], [[COERCE65_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE66]], align 16 +// COMPAT_17-NEXT: [[COERCE66_TUPLE:%.*]] = load { , }, ptr [[COERCE66]], align 16 +// COMPAT_17-NEXT: [[COERCE66_EXTRACT0:%.*]] = extractvalue { , } [[COERCE66_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE66_EXTRACT1:%.*]] = extractvalue { , } [[COERCE66_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE67]], align 16 +// COMPAT_17-NEXT: [[COERCE67_TUPLE:%.*]] = load { , }, ptr [[COERCE67]], align 16 +// COMPAT_17-NEXT: [[COERCE67_EXTRACT0:%.*]] = extractvalue { , } [[COERCE67_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE67_EXTRACT1:%.*]] = extractvalue { , } [[COERCE67_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f14svbfloat16x2_t14svbfloat16x2_t( [[COERCE66_EXTRACT0]], [[COERCE66_EXTRACT1]], [[COERCE67_EXTRACT0]], [[COERCE67_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE68]], align 16 +// COMPAT_17-NEXT: [[COERCE68_TUPLE:%.*]] = load { , , }, ptr [[COERCE68]], align 16 +// COMPAT_17-NEXT: [[COERCE68_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE68_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE68_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE68_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE68_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE68_TUPLE]], 2 +// COMPAT_17-NEXT: store { , , } zeroinitializer, ptr [[COERCE69]], align 16 +// COMPAT_17-NEXT: [[COERCE69_TUPLE:%.*]] = load { , , }, ptr [[COERCE69]], align 16 +// COMPAT_17-NEXT: [[COERCE69_EXTRACT0:%.*]] = extractvalue { , , } [[COERCE69_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE69_EXTRACT1:%.*]] = extractvalue { , , } [[COERCE69_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE69_EXTRACT2:%.*]] = extractvalue { , , } [[COERCE69_TUPLE]], 2 +// COMPAT_17-NEXT: call void @_Z1f14svbfloat16x3_t14svbfloat16x3_t( [[COERCE68_EXTRACT0]], [[COERCE68_EXTRACT1]], [[COERCE68_EXTRACT2]], [[COERCE69_EXTRACT0]], [[COERCE69_EXTRACT1]], [[COERCE69_EXTRACT2]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE70]], align 16 +// COMPAT_17-NEXT: [[COERCE70_TUPLE:%.*]] = load { , , , }, ptr [[COERCE70]], align 16 +// COMPAT_17-NEXT: [[COERCE70_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE70_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE70_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE70_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE70_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE71]], align 16 +// COMPAT_17-NEXT: [[COERCE71_TUPLE:%.*]] = load { , , , }, ptr [[COERCE71]], align 16 +// COMPAT_17-NEXT: [[COERCE71_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE71_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE71_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE71_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE71_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f14svbfloat16x4_t14svbfloat16x4_t( [[COERCE70_EXTRACT0]], [[COERCE70_EXTRACT1]], [[COERCE70_EXTRACT2]], [[COERCE70_EXTRACT3]], [[COERCE71_EXTRACT0]], [[COERCE71_EXTRACT1]], [[COERCE71_EXTRACT2]], [[COERCE71_EXTRACT3]]) +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE72]], align 2 +// COMPAT_17-NEXT: [[COERCE72_TUPLE:%.*]] = load { , }, ptr [[COERCE72]], align 2 +// COMPAT_17-NEXT: [[COERCE72_EXTRACT0:%.*]] = extractvalue { , } [[COERCE72_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE72_EXTRACT1:%.*]] = extractvalue { , } [[COERCE72_TUPLE]], 1 +// COMPAT_17-NEXT: store { , } zeroinitializer, ptr [[COERCE73]], align 2 +// COMPAT_17-NEXT: [[COERCE73_TUPLE:%.*]] = load { , }, ptr [[COERCE73]], align 2 +// COMPAT_17-NEXT: [[COERCE73_EXTRACT0:%.*]] = extractvalue { , } [[COERCE73_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE73_EXTRACT1:%.*]] = extractvalue { , } [[COERCE73_TUPLE]], 1 +// COMPAT_17-NEXT: call void @_Z1f10svboolx2_t10svboolx2_t( [[COERCE72_EXTRACT0]], [[COERCE72_EXTRACT1]], [[COERCE73_EXTRACT0]], [[COERCE73_EXTRACT1]]) +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE74]], align 2 +// COMPAT_17-NEXT: [[COERCE74_TUPLE:%.*]] = load { , , , }, ptr [[COERCE74]], align 2 +// COMPAT_17-NEXT: [[COERCE74_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE74_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE74_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE74_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE74_TUPLE]], 3 +// COMPAT_17-NEXT: store { , , , } zeroinitializer, ptr [[COERCE75]], align 2 +// COMPAT_17-NEXT: [[COERCE75_TUPLE:%.*]] = load { , , , }, ptr [[COERCE75]], align 2 +// COMPAT_17-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 0 +// COMPAT_17-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 1 +// COMPAT_17-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 2 +// COMPAT_17-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { , , , } [[COERCE75_TUPLE]], 3 +// COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t( [[COERCE74_EXTRACT0]], [[COERCE74_EXTRACT1]], [[COERCE74_EXTRACT2]], [[COERCE74_EXTRACT3]], [[COERCE75_EXTRACT0]], [[COERCE75_EXTRACT1]], [[COERCE75_EXTRACT2]], [[COERCE75_EXTRACT3]]) // COMPAT_17-NEXT: ret void // void foo() { diff --git a/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp b/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp index 1a152c1741c177..503d77a1822a4b 100644 --- a/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-vector-init.cpp @@ -16,45 +16,45 @@ // CHECK-NEXT: [[F32:%.*]] = alloca , align 16 // CHECK-NEXT: [[F64:%.*]] = alloca , align 16 // CHECK-NEXT: [[BF16:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S8X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S16X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S32X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[X64X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U8X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U16X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U32X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U64X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F16X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F32X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F64X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[BF16X2:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S8X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S16X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S32X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[X64X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U8X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U16X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U32X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U64X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F16X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F32X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F64X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[BF16X3:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S8X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S16X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[S32X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[X64X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U8X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U16X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U32X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[U64X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F16X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F32X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[F64X4:%.*]] = alloca , align 16 -// CHECK-NEXT: [[BF16X4:%.*]] = alloca , align 16 +// CHECK-NEXT: [[S8X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[S16X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[S32X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[X64X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[U8X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[U16X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[U32X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[U64X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[F16X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[F32X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[F64X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[BF16X2:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[S8X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[S16X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[S32X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[X64X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[U8X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[U16X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[U32X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[U64X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[F16X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[F32X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[F64X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[BF16X3:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[S8X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[S16X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[S32X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[X64X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[U8X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[U16X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[U32X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[U64X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[F16X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[F32X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[F64X4:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[BF16X4:%.*]] = alloca { , , , }, align 16 // CHECK-NEXT: [[B8:%.*]] = alloca , align 2 -// CHECK-NEXT: [[B8X2:%.*]] = alloca , align 2 -// CHECK-NEXT: [[B8X4:%.*]] = alloca , align 2 +// CHECK-NEXT: [[B8X2:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[B8X4:%.*]] = alloca { , , , }, align 2 // CHECK-NEXT: [[CNT:%.*]] = alloca target("aarch64.svcount"), align 2 // CHECK-NEXT: store zeroinitializer, ptr [[S8]], align 16 // CHECK-NEXT: store zeroinitializer, ptr [[S16]], align 16 @@ -68,45 +68,45 @@ // CHECK-NEXT: store zeroinitializer, ptr [[F32]], align 16 // CHECK-NEXT: store zeroinitializer, ptr [[F64]], align 16 // CHECK-NEXT: store zeroinitializer, ptr [[BF16]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S8X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S16X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S32X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[X64X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U8X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U16X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U32X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U64X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F16X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F32X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F64X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[BF16X2]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S8X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S16X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S32X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[X64X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U8X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U16X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U32X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U64X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F16X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F32X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F64X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[BF16X3]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S8X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S16X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[S32X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[X64X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U8X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U16X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U32X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[U64X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F16X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F32X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[F64X4]], align 16 -// CHECK-NEXT: store zeroinitializer, ptr [[BF16X4]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[S8X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[S16X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[S32X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[X64X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[U8X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[U16X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[U32X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[U64X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[F16X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[F32X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[F64X2]], align 16 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[BF16X2]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[S8X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[S16X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[S32X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[X64X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[U8X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[U16X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[U32X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[U64X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[F16X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[F32X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[F64X3]], align 16 +// CHECK-NEXT: store { , , } zeroinitializer, ptr [[BF16X3]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[S8X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[S16X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[S32X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[X64X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[U8X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[U16X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[U32X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[U64X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[F16X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[F32X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[F64X4]], align 16 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[BF16X4]], align 16 // CHECK-NEXT: store zeroinitializer, ptr [[B8]], align 2 -// CHECK-NEXT: store zeroinitializer, ptr [[B8X2]], align 2 -// CHECK-NEXT: store zeroinitializer, ptr [[B8X4]], align 2 +// CHECK-NEXT: store { , } zeroinitializer, ptr [[B8X2]], align 2 +// CHECK-NEXT: store { , , , } zeroinitializer, ptr [[B8X4]], align 2 // CHECK-NEXT: store target("aarch64.svcount") zeroinitializer, ptr [[CNT]], align 2 // CHECK-NEXT: ret void // @@ -339,13 +339,18 @@ void test_copy_bf16(__SVBfloat16_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_s8x210svint8x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s8x2(__clang_svint8x2_t a) { @@ -353,13 +358,18 @@ void test_copy_s8x2(__clang_svint8x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s16x211svint16x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s16x2(__clang_svint16x2_t a) { @@ -367,13 +377,18 @@ void test_copy_s16x2(__clang_svint16x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s32x211svint32x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s32x2(__clang_svint32x2_t a) { @@ -381,13 +396,18 @@ void test_copy_s32x2(__clang_svint32x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s64x211svint64x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s64x2(__clang_svint64x2_t a) { @@ -395,13 +415,18 @@ void test_copy_s64x2(__clang_svint64x2_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_u8x211svuint8x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u8x2(__clang_svuint8x2_t a) { @@ -409,13 +434,18 @@ void test_copy_u8x2(__clang_svuint8x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u16x212svuint16x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u16x2(__clang_svuint16x2_t a) { @@ -423,13 +453,18 @@ void test_copy_u16x2(__clang_svuint16x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u32x212svuint32x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u32x2(__clang_svuint32x2_t a) { @@ -437,13 +472,18 @@ void test_copy_u32x2(__clang_svuint32x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u64x212svuint64x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u64x2(__clang_svuint64x2_t a) { @@ -451,13 +491,18 @@ void test_copy_u64x2(__clang_svuint64x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f16x213svfloat16x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f16x2(__clang_svfloat16x2_t a) { @@ -465,13 +510,18 @@ void test_copy_f16x2(__clang_svfloat16x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f32x213svfloat32x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f32x2(__clang_svfloat32x2_t a) { @@ -479,13 +529,18 @@ void test_copy_f32x2(__clang_svfloat32x2_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f64x213svfloat64x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f64x2(__clang_svfloat64x2_t a) { @@ -493,13 +548,18 @@ void test_copy_f64x2(__clang_svfloat64x2_t a) { } // CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x214svbfloat16x2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_bf16x2(__clang_svbfloat16x2_t a) { @@ -507,13 +567,19 @@ void test_copy_bf16x2(__clang_svbfloat16x2_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_s8x310svint8x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s8x3(__clang_svint8x3_t a) { @@ -521,13 +587,19 @@ void test_copy_s8x3(__clang_svint8x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s16x311svint16x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s16x3(__clang_svint16x3_t a) { @@ -535,13 +607,19 @@ void test_copy_s16x3(__clang_svint16x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s32x311svint32x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s32x3(__clang_svint32x3_t a) { @@ -549,13 +627,19 @@ void test_copy_s32x3(__clang_svint32x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s64x311svint64x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s64x3(__clang_svint64x3_t a) { @@ -563,13 +647,19 @@ void test_copy_s64x3(__clang_svint64x3_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_u8x311svuint8x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u8x3(__clang_svuint8x3_t a) { @@ -577,13 +667,19 @@ void test_copy_u8x3(__clang_svuint8x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u16x312svuint16x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u16x3(__clang_svuint16x3_t a) { @@ -591,13 +687,19 @@ void test_copy_u16x3(__clang_svuint16x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u32x312svuint32x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u32x3(__clang_svuint32x3_t a) { @@ -605,13 +707,19 @@ void test_copy_u32x3(__clang_svuint32x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u64x312svuint64x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u64x3(__clang_svuint64x3_t a) { @@ -619,13 +727,19 @@ void test_copy_u64x3(__clang_svuint64x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f16x313svfloat16x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f16x3(__clang_svfloat16x3_t a) { @@ -633,13 +747,19 @@ void test_copy_f16x3(__clang_svfloat16x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f32x313svfloat32x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f32x3(__clang_svfloat32x3_t a) { @@ -647,13 +767,19 @@ void test_copy_f32x3(__clang_svfloat32x3_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f64x313svfloat64x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f64x3(__clang_svfloat64x3_t a) { @@ -661,13 +787,19 @@ void test_copy_f64x3(__clang_svfloat64x3_t a) { } // CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x314svbfloat16x3_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: store { , , } [[TMP2]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load { , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , } [[TMP3]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_bf16x3(__clang_svbfloat16x3_t a) { @@ -675,13 +807,20 @@ void test_copy_bf16x3(__clang_svbfloat16x3_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_s8x410svint8x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s8x4(__clang_svint8x4_t a) { @@ -689,13 +828,20 @@ void test_copy_s8x4(__clang_svint8x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s16x411svint16x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s16x4(__clang_svint16x4_t a) { @@ -703,13 +849,20 @@ void test_copy_s16x4(__clang_svint16x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s32x411svint32x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s32x4(__clang_svint32x4_t a) { @@ -717,13 +870,20 @@ void test_copy_s32x4(__clang_svint32x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_s64x411svint64x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_s64x4(__clang_svint64x4_t a) { @@ -731,13 +891,20 @@ void test_copy_s64x4(__clang_svint64x4_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_u8x411svuint8x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u8x4(__clang_svuint8x4_t a) { @@ -745,13 +912,20 @@ void test_copy_u8x4(__clang_svuint8x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u16x412svuint16x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u16x4(__clang_svuint16x4_t a) { @@ -759,13 +933,20 @@ void test_copy_u16x4(__clang_svuint16x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u32x412svuint32x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u32x4(__clang_svuint32x4_t a) { @@ -773,13 +954,20 @@ void test_copy_u32x4(__clang_svuint32x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_u64x412svuint64x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_u64x4(__clang_svuint64x4_t a) { @@ -787,13 +975,20 @@ void test_copy_u64x4(__clang_svuint64x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f16x413svfloat16x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f16x4(__clang_svfloat16x4_t a) { @@ -801,13 +996,20 @@ void test_copy_f16x4(__clang_svfloat16x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f32x413svfloat32x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f32x4(__clang_svfloat32x4_t a) { @@ -815,13 +1017,20 @@ void test_copy_f32x4(__clang_svfloat32x4_t a) { } // CHECK-LABEL: define dso_local void @_Z15test_copy_f64x413svfloat64x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_f64x4(__clang_svfloat64x4_t a) { @@ -829,13 +1038,20 @@ void test_copy_f64x4(__clang_svfloat64x4_t a) { } // CHECK-LABEL: define dso_local void @_Z16test_copy_bf16x414svbfloat16x4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 16 -// CHECK-NEXT: [[B:%.*]] = alloca , align 16 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 16 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 16 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 16 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 16 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 16 // CHECK-NEXT: ret void // void test_copy_bf16x4(__clang_svbfloat16x4_t a) { @@ -857,13 +1073,18 @@ void test_copy_b8(__SVBool_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_b8x210svboolx2_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 2 -// CHECK-NEXT: [[B:%.*]] = alloca , align 2 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 2 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 2 +// CHECK-NEXT: [[A:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[B:%.*]] = alloca { , }, align 2 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: store { , } [[TMP1]], ptr [[A]], align 2 +// CHECK-NEXT: [[A1:%.*]] = load { , }, ptr [[A]], align 2 +// CHECK-NEXT: store { , } [[A1]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load { , }, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store { , } [[TMP2]], ptr [[B]], align 2 // CHECK-NEXT: ret void // void test_copy_b8x2(__clang_svboolx2_t a) { @@ -871,13 +1092,20 @@ void test_copy_b8x2(__clang_svboolx2_t a) { } // CHECK-LABEL: define dso_local void @_Z14test_copy_b8x410svboolx4_t -// CHECK-SAME: ( [[A:%.*]]) #[[ATTR0]] { +// CHECK-SAME: ( [[A_COERCE0:%.*]], [[A_COERCE1:%.*]], [[A_COERCE2:%.*]], [[A_COERCE3:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca , align 2 -// CHECK-NEXT: [[B:%.*]] = alloca , align 2 -// CHECK-NEXT: store [[A]], ptr [[A_ADDR]], align 2 -// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A_ADDR]], align 2 -// CHECK-NEXT: store [[TMP0]], ptr [[B]], align 2 +// CHECK-NEXT: [[A:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[B:%.*]] = alloca { , , , }, align 2 +// CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , , , } poison, [[A_COERCE0]], 0 +// CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , , , } [[TMP0]], [[A_COERCE1]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue { , , , } [[TMP1]], [[A_COERCE2]], 2 +// CHECK-NEXT: [[TMP3:%.*]] = insertvalue { , , , } [[TMP2]], [[A_COERCE3]], 3 +// CHECK-NEXT: store { , , , } [[TMP3]], ptr [[A]], align 2 +// CHECK-NEXT: [[A1:%.*]] = load { , , , }, ptr [[A]], align 2 +// CHECK-NEXT: store { , , , } [[A1]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load { , , , }, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store { , , , } [[TMP4]], ptr [[B]], align 2 // CHECK-NEXT: ret void // void test_copy_b8x4(__clang_svboolx4_t a) {