From 8c9c9bd3e590dc02ee82d0442df5b94e1beed06d Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Wed, 11 Oct 2023 13:04:28 +0000 Subject: [PATCH 1/7] [Clang][SME2] Add multi-vector add/sub builtins Adds the following SME2 builtins: - sv(add|sub) - sv(add|sub)_za32/za64, - sv(add|sub)_write_za32/za64 Other changes in this patch: - SveEmitter.cpp: Adds the createCoreHeaderIntrinsics function to remove duplicated code in createHeader & createSMEHeader. Uses a new enum (ACLEKind) to choose either "__builtin_sme_" or "__builtin_sve_" when emitting the intrinsics. - CGBuiltin.cpp: The GetAArch64SMEProcessedOperands function is created to avoid duplicating existing code from EmitAArch64SVEBuiltinExpr. - arm_sve.td: The add/sub SME2 builtins which do not operate on ZA have been added to arm_sve.td, matching the corrosponding LLVM IR intrinsic names which start with @llvm.aarch64.sve for this reason. See https://github.com/ARM-software/acle/pull/217/files --- clang/include/clang/Basic/arm_sme.td | 35 + clang/include/clang/Basic/arm_sve.td | 9 + clang/include/clang/Basic/arm_sve_sme_incl.td | 2 +- clang/lib/CodeGen/CodeGenFunction.h | 5 + .../aarch64-sme2-intrinsics/acle_sme2_add.c | 1226 +++++++++++++++++ .../aarch64-sme2-intrinsics/acle_sme2_sub.c | 418 ++++++ clang/utils/TableGen/SveEmitter.cpp | 105 +- 7 files changed, 1737 insertions(+), 63 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 8d85327a86b1aa..b5655afdf419ec 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -263,3 +263,38 @@ multiclass ZAFPOuterProd { defm SVMOPA : ZAFPOuterProd<"mopa">; defm SVMOPS : ZAFPOuterProd<"mops">; + +//////////////////////////////////////////////////////////////////////////////// +// SME2 - ADD, SUB + +multiclass ZAAddSub { + let TargetGuard = "sme2" in { + def NAME # _WRITE_SINGLE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x2", "vm2d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_SINGLE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x4", "vm4d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _WRITE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x2", "vm22", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x4", "vm44", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _ZA32_VG1x2_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x2", "vm2", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x4", "vm4", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x4", [IsStreaming, IsSharedZA], []>; + + let TargetGuard = "sme-i16i64" in { + def NAME # _WRITE_SINGLE_ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x2", "vm2d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_SINGLE_ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x4", "vm4d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _WRITE_ZA64_VG1x2_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x2", "vm22", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _WRITE_ZA64_VG1x4_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x4", "vm44", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>; + + def NAME # _ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>; + } + + let TargetGuard = "sme-f64f64" in { + def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>; + def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>; + } + } +} + +defm SVADD : ZAAddSub<"add">; +defm SVSUB : ZAAddSub<"sub">; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b5baafedd13960..53d5d32807ce39 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1980,3 +1980,12 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; } + +//////////////////////////////////////////////////////////////////////////////// +// SME2 + +let TargetGuard = "sme2" in { +// == ADD (vectors) == + def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; + def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; +} diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 3a7a5b51b25801..22a2a3c5434d65 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -257,7 +257,7 @@ class ImmCheck { } class Inst ft, list ch, MemEltType met> { + list ft, list ch, MemEltType met = MemEltTyDefault> { string Name = n; string Prototype = p; string Types = t; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ee2c4b1e10afba..54cb0a01fd8b69 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4297,6 +4297,11 @@ class CodeGenFunction : public CodeGenTypeCache { /// the wider vector. This avoids the error when allocating space in llvm /// for struct of scalable vectors if a function returns struct. llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); + + void GetAArch64SMEProcessedOperands(unsigned BuiltinID, const CallExpr *E, + SmallVectorImpl &Ops, + SVETypeFlags TypeFlags); + llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c new file mode 100644 index 00000000000000..18d7c66896d8b8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -0,0 +1,1226 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Single-Multi +// + +// x2 +// CHECK-LABEL: @test_svadd_write_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s32j11svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u32j12svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s64j11svint64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u64j12svuint64x2_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); +} + +// x4 + +// CHECK-LABEL: @test_svadd_write_single4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_s32j11svint32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_u32j12svuint32x4_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_s64j11svint64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_single4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_u64j12svuint64x4_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) { + SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); +} + +// +// Multi-Multi +// + +// x2 + +// CHECK-LABEL: @test_svadd_write_multi2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s32j11svint32x2_t11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_multi2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u32j12svuint32x2_t12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_multi2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s64j11svint64x2_t11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_multi2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u64j12svuint64x2_t12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); +} + +// x4 + +// CHECK-LABEL: @test_svadd_write_multi4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s32j11svint32x4_t11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_multi4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u32j12svuint32x4_t12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_multi4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s64j11svint64x4_t11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svadd_write_multi4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u64j12svuint64x4_t12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) { + SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); +} + +// +// Multi-Single Vector +// + +// x2 + +// CHECK-LABEL: @test_svadd_vector_single2_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); +} + + +// x4 + +// CHECK-LABEL: @test_svadd_vector_single4_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) { + return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) { + return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); +} + +// +// Accumulate to ZA +// + +// x2 + +// CHECK-LABEL: @test_svadd_za32_vg1x2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_f32j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { + SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za32_vg1x2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_s32j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { + SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za32_vg1x2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_u32j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { + SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za64_vg1x2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_f64j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { + SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za64_vg1x2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_s64j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { + SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za64_vg1x2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_u64j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { + SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x2)(slice_base + 6, zn); +} + +// x4 + +// CHECK-LABEL: @test_svadd_za32_vg1x4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_f32j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { + SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za32_vg1x4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_s32j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { + SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za32_vg1x4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_u32j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { + SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za64_vg1x4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_f64j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { + SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za64_vg1x4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_s64j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { + SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svadd_za64_vg1x4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_u64j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) { + SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x4)(slice_base + 6, zn); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c new file mode 100644 index 00000000000000..2cc5c9cebbff03 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -0,0 +1,418 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Single-Multi +// + +// x2 +// CHECK-LABEL: @test_svsub_write_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_u32j12svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_single2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_u64j12svuint64x2_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); +} + +// x4 + +// CHECK-LABEL: @test_svsub_write_single4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_u32j12svuint32x4_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_single4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_u64j12svuint64x4_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); +} + +// +// Multi-Multi +// + +// x2 + +// CHECK-LABEL: @test_svsub_write_multi2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u32j12svuint32x2_t12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_multi2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u64j12svuint64x2_t12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); +} + +// x4 + +// CHECK-LABEL: @test_svsub_write_multi4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u32j12svuint32x4_t12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_multi4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u64j12svuint64x4_t12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); +} + +// +// Accumulate to ZA +// + +// x2 + +// CHECK-LABEL: @test_svsub_za32_vg1x2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_f32j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { + SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svsub_za32_vg1x2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_u32j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { + SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x2)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svsub_za64_vg1x2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_f64j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { + SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x2)(slice_base + 6, zn); +} + + +// CHECK-LABEL: @test_svsub_za64_vg1x2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_u64j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { + SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x2)(slice_base + 6, zn); +} + +// x4 + +// CHECK-LABEL: @test_svsub_za32_vg1x4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_f32j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { + SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svsub_za32_vg1x4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_u32j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { + SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svsub_za64_vg1x4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_f64j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { + SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x4)(slice_base + 6, zn); +} + +// CHECK-LABEL: @test_svsub_za64_vg1x4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_u64j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) { + SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x4)(slice_base + 6, zn); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 368908e79bf196..ff1fac1bc2a379 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -43,6 +43,8 @@ enum ClassKind { ClassG, // Overloaded name without type suffix }; +enum class ACLEKind { SVE, SME }; + using TypeSpec = std::string; namespace { @@ -246,7 +248,7 @@ class Intrinsic { } /// Emits the intrinsic declaration to the ostream. - void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const; + void emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const; private: std::string getMergeSuffix() const { return MergeSuffix; } @@ -354,6 +356,9 @@ class SVEEmitter { /// Emit arm_sve.h. void createHeader(raw_ostream &o); + // Emits core intrinsics in both arm_sme.h and arm_sve.h + void createCoreHeaderIntrinsics(raw_ostream &o, ACLEKind Kind); + /// Emit all the __builtin prototypes and code needed by Sema. void createBuiltins(raw_ostream &o); @@ -1023,29 +1028,24 @@ std::string Intrinsic::mangleName(ClassKind LocalCK) const { getMergeSuffix(); } -void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter) const { +void Intrinsic::emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const { bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1; std::string FullName = mangleName(ClassS); std::string ProtoName = mangleName(getClassKind()); std::string SMEAttrs = ""; - if (Flags & Emitter.getEnumValueForFlag("IsStreaming")) - SMEAttrs += ", arm_streaming"; - if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible")) - SMEAttrs += ", arm_streaming_compatible"; - if (Flags & Emitter.getEnumValueForFlag("IsSharedZA")) - SMEAttrs += ", arm_shared_za"; - if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA")) - SMEAttrs += ", arm_preserves_za"; - OS << (IsOverloaded ? "__aio " : "__ai ") - << "__attribute__((__clang_arm_builtin_alias(" - << (SMEAttrs.empty() ? "__builtin_sve_" : "__builtin_sme_") - << FullName << ")"; - if (!SMEAttrs.empty()) - OS << SMEAttrs; - OS << "))\n"; + << "__attribute__((__clang_arm_builtin_alias("; + + switch (Kind) { + case ACLEKind::SME: + OS << "__builtin_sme_" << FullName << ")))\n"; + break; + case ACLEKind::SVE: + OS << "__builtin_sve_" << FullName << ")))\n"; + break; + } OS << getTypes()[0].str() << " " << ProtoName << "("; for (unsigned I = 0; I < getTypes().size() - 1; ++I) { @@ -1180,6 +1180,30 @@ void SVEEmitter::createIntrinsic( } } +void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, ACLEKind Kind) { + SmallVector, 128> Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + // Sort intrinsics in header file by following order/priority: + // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) + // - Class (is intrinsic overloaded or not) + // - Intrinsic name + std::stable_sort( + Defs.begin(), Defs.end(), [](const std::unique_ptr &A, + const std::unique_ptr &B) { + auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName()); + }; + return ToTuple(A) < ToTuple(B); + }); + + // Actually emit the intrinsic declarations. + for (auto &I : Defs) + I->emitIntrinsic(OS, Kind); +} + void SVEEmitter::createHeader(raw_ostream &OS) { OS << "/*===---- arm_sve.h - ARM SVE intrinsics " "-----------------------------------===\n" @@ -1331,27 +1355,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) { << To.Suffix << "(__VA_ARGS__)\n"; } - SmallVector, 128> Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) - createIntrinsic(R, Defs); - - // Sort intrinsics in header file by following order/priority: - // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) - // - Class (is intrinsic overloaded or not) - // - Intrinsic name - std::stable_sort( - Defs.begin(), Defs.end(), [](const std::unique_ptr &A, - const std::unique_ptr &B) { - auto ToTuple = [](const std::unique_ptr &I) { - return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName()); - }; - return ToTuple(A) < ToTuple(B); - }); - - // Actually emit the intrinsic declarations. - for (auto &I : Defs) - I->emitIntrinsic(OS, *this); + createCoreHeaderIntrinsics(OS, ACLEKind::SVE); OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n"; OS << "#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m\n"; @@ -1533,30 +1537,7 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "extern \"C\" {\n"; OS << "#endif\n\n"; - SmallVector, 128> Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) - createIntrinsic(R, Defs); - - // Sort intrinsics in header file by following order/priority similar to SVE: - // - Architectural guard - // - Class (is intrinsic overloaded or not) - // - Intrinsic name - std::stable_sort(Defs.begin(), Defs.end(), - [](const std::unique_ptr &A, - const std::unique_ptr &B) { - auto ToTuple = [](const std::unique_ptr &I) { - return std::make_tuple(I->getGuard(), - (unsigned)I->getClassKind(), - I->getName()); - }; - return ToTuple(A) < ToTuple(B); - }); - - // Actually emit the intrinsic declaration. - for (auto &I : Defs) { - I->emitIntrinsic(OS, *this); - } + createCoreHeaderIntrinsics(OS, ACLEKind::SME); OS << "#ifdef __cplusplus\n"; OS << "} // extern \"C\"\n"; From 2c64efea927730852d6b7b4890130d9f46a78b94 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 23 Oct 2023 10:03:32 +0000 Subject: [PATCH 2/7] - Rename GetAArch64SMEProcessedOperands to GetAArch64SVEProcessedOperands - Removed code checking for ICEArguments from EmitAArch64SMEBuiltinExpr - Added comment over check for IsTupleGetOrSet - Added the code to print SME attributes back into emitIntrinsic --- clang/lib/CodeGen/CodeGenFunction.h | 2 +- clang/utils/TableGen/SveEmitter.cpp | 34 +++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 54cb0a01fd8b69..5224569ab803fb 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4298,7 +4298,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// for struct of scalable vectors if a function returns struct. llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); - void GetAArch64SMEProcessedOperands(unsigned BuiltinID, const CallExpr *E, + void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, SVETypeFlags TypeFlags); diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index ff1fac1bc2a379..21fb04d934bc96 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -248,7 +248,7 @@ class Intrinsic { } /// Emits the intrinsic declaration to the ostream. - void emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const; + void emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter, ACLEKind Kind) const; private: std::string getMergeSuffix() const { return MergeSuffix; } @@ -357,7 +357,7 @@ class SVEEmitter { void createHeader(raw_ostream &o); // Emits core intrinsics in both arm_sme.h and arm_sve.h - void createCoreHeaderIntrinsics(raw_ostream &o, ACLEKind Kind); + void createCoreHeaderIntrinsics(raw_ostream &o, SVEEmitter &Emitter, ACLEKind Kind); /// Emit all the __builtin prototypes and code needed by Sema. void createBuiltins(raw_ostream &o); @@ -1028,25 +1028,39 @@ std::string Intrinsic::mangleName(ClassKind LocalCK) const { getMergeSuffix(); } -void Intrinsic::emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const { +void Intrinsic::emitIntrinsic(raw_ostream &OS, SVEEmitter &Emitter, + ACLEKind Kind) const { bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1; std::string FullName = mangleName(ClassS); std::string ProtoName = mangleName(getClassKind()); std::string SMEAttrs = ""; + if (Flags & Emitter.getEnumValueForFlag("IsStreaming")) + SMEAttrs += ", arm_streaming"; + if (Flags & Emitter.getEnumValueForFlag("IsStreamingCompatible")) + SMEAttrs += ", arm_streaming_compatible"; + if (Flags & Emitter.getEnumValueForFlag("IsSharedZA")) + SMEAttrs += ", arm_shared_za"; + if (Flags & Emitter.getEnumValueForFlag("IsPreservesZA")) + SMEAttrs += ", arm_preserves_za"; + OS << (IsOverloaded ? "__aio " : "__ai ") << "__attribute__((__clang_arm_builtin_alias("; switch (Kind) { case ACLEKind::SME: - OS << "__builtin_sme_" << FullName << ")))\n"; + OS << "__builtin_sme_" << FullName << ")"; break; case ACLEKind::SVE: - OS << "__builtin_sve_" << FullName << ")))\n"; + OS << "__builtin_sve_" << FullName << ")"; break; } + if (!SMEAttrs.empty()) + OS << SMEAttrs; + OS << "))\n"; + OS << getTypes()[0].str() << " " << ProtoName << "("; for (unsigned I = 0; I < getTypes().size() - 1; ++I) { if (I != 0) @@ -1180,7 +1194,9 @@ void SVEEmitter::createIntrinsic( } } -void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, ACLEKind Kind) { +void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, + SVEEmitter &Emitter, + ACLEKind Kind) { SmallVector, 128> Defs; std::vector RV = Records.getAllDerivedDefinitions("Inst"); for (auto *R : RV) @@ -1201,7 +1217,7 @@ void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, ACLEKind Kind) { // Actually emit the intrinsic declarations. for (auto &I : Defs) - I->emitIntrinsic(OS, Kind); + I->emitIntrinsic(OS, Emitter, Kind); } void SVEEmitter::createHeader(raw_ostream &OS) { @@ -1355,7 +1371,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) { << To.Suffix << "(__VA_ARGS__)\n"; } - createCoreHeaderIntrinsics(OS, ACLEKind::SVE); + createCoreHeaderIntrinsics(OS, *this, ACLEKind::SVE); OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n"; OS << "#define svcvtnt_bf16_f32_x svcvtnt_bf16_f32_m\n"; @@ -1537,7 +1553,7 @@ void SVEEmitter::createSMEHeader(raw_ostream &OS) { OS << "extern \"C\" {\n"; OS << "#endif\n\n"; - createCoreHeaderIntrinsics(OS, ACLEKind::SME); + createCoreHeaderIntrinsics(OS, *this, ACLEKind::SME); OS << "#ifdef __cplusplus\n"; OS << "} // extern \"C\"\n"; From 3c770ace7a5314d48c8a68205e90ead252720825 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 26 Oct 2023 15:58:48 +0000 Subject: [PATCH 3/7] - Removed the add of an immediate offset from the tests - Added missing tests to acle_sme2_sub.c --- .../aarch64-sme2-intrinsics/acle_sme2_add.c | 224 ++--- .../aarch64-sme2-intrinsics/acle_sme2_sub.c | 912 ++++++++++++++++-- 2 files changed, 916 insertions(+), 220 deletions(-) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c index 18d7c66896d8b8..81dac173e1a016 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -24,180 +24,164 @@ // x2 // CHECK-LABEL: @test_svadd_write_single2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s32j11svint32x2_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u32j12svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_single2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_s64j11svint64x2_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single2_u64j12svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x2)(slice_base, zn, zm); } // x4 // CHECK-LABEL: @test_svadd_write_single4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_s32j11svint32x4_tu11__SVInt32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_u32j12svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_single4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_s64j11svint64x4_tu11__SVInt64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svadd_write_single4_u64j12svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) { - SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x4)(slice_base, zn, zm); } // @@ -208,105 +192,96 @@ void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CHECK-LABEL: @test_svadd_write_multi2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s32j11svint32x2_t11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_multi2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u32j12svuint32x2_t12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_multi2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s64j11svint64x2_t11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_multi2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u64j12svuint64x2_t12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x2)(slice_base, zn, zm); } // x4 // CHECK-LABEL: @test_svadd_write_multi4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) @@ -315,12 +290,11 @@ void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s32j11svint32x4_t11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) @@ -329,16 +303,15 @@ void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_multi4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) @@ -347,12 +320,11 @@ void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u32j12svuint32x4_t12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) @@ -361,16 +333,15 @@ void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_multi4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) @@ -379,12 +350,11 @@ void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s64j11svint64x4_t11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) @@ -393,16 +363,15 @@ void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svadd_write_multi4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) @@ -411,12 +380,11 @@ void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u64j12svuint64x4_t12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) @@ -425,11 +393,11 @@ void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) { - SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } // @@ -961,266 +929,242 @@ svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) { // CHECK-LABEL: @test_svadd_za32_vg1x2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_f32j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { - SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x2)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za32_vg1x2_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_s32j11svint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { - SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x2)(slice_base , zn); } // CHECK-LABEL: @test_svadd_za32_vg1x2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x2_u32j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { - SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x2)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za64_vg1x2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_f64j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { - SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x2)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za64_vg1x2_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_s64j11svint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { - SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x2)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za64_vg1x2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x2_u64j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { - SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x2)(slice_base, zn); } // x4 // CHECK-LABEL: @test_svadd_za32_vg1x4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_f32j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { - SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za32_vg1x4_s32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_s32j11svint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { - SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za32_vg1x4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za32_vg1x4_u32j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { - SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za64_vg1x4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_f64j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { - SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za64_vg1x4_s64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_s64j11svint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { - SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svadd_za64_vg1x4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svadd_za64_vg1x4_u64j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) { - SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x4)(slice_base, zn); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c index 2cc5c9cebbff03..b499b47944cc5b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -22,94 +22,166 @@ // // x2 +// CHECK-LABEL: @test_svsub_write_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_s32j11svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); +} + // CHECK-LABEL: @test_svsub_write_single2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_u32j12svuint32x2_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { - SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_s64j11svint64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za64,_s64,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svsub_write_single2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single2_u64j12svuint64x2_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) { - SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x2)(slice_base, zn, zm); } // x4 +// CHECK-LABEL: @test_svsub_write_single4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_s32j11svint32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za32,_s32,_vg1x4)(slice_base, zn, zm); +} + // CHECK-LABEL: @test_svsub_write_single4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_u32j12svuint32x4_tu12__SVUint32_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) { - SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x4)(slice_base, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_single4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_s64j11svint64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) { + SVE_ACLE_FUNC(svsub_write,_single,_za64,_s64,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svsub_write_single4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z28test_svsub_write_single4_u64j12svuint64x4_tu12__SVUint64_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) { - SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x4)(slice_base, zn, zm); } // @@ -118,59 +190,128 @@ void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // x2 +// CHECK-LABEL: @test_svsub_write_multi2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s32j11svint32x2_t11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za32,_s32,_vg1x2)(slice_base, zn, zm); +} + // CHECK-LABEL: @test_svsub_write_multi2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u32j12svuint32x2_t12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZM]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) { - SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x2)(slice_base, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_multi2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s64j11svint64x2_t11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za64,_s64,_vg1x2)(slice_base, zn, zm); } // CHECK-LABEL: @test_svsub_write_multi2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u64j12svuint64x2_t12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZM]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) { - SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x2)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x2)(slice_base, zn, zm); } // x4 +// CHECK-LABEL: @test_svsub_write_multi4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s32j11svint32x4_t11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za32,_s32,_vg1x4)(slice_base, zn, zm); +} + // CHECK-LABEL: @test_svsub_write_multi4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) @@ -179,12 +320,11 @@ void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) // CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u32j12svuint32x4_t12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) @@ -193,16 +333,45 @@ void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 4) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 8) // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZM]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) { - SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x4)(slice_base, zn, zm); +} + +// CHECK-LABEL: @test_svsub_write_multi4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s64j11svint64x4_t11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) { + SVE_ACLE_FUNC(svsub_write,,_za64,_s64,_vg1x4)(slice_base, zn, zm); } // CHECK-LABEL: @test_svsub_write_multi4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) @@ -211,12 +380,11 @@ void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) // CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) // CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u64j12svuint64x4_t12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) @@ -225,11 +393,532 @@ void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 2) // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 4) // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZM]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) { - SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x4)(slice_base + 7, zn, zm); + SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); +} + +// +// Multi-Single Vector +// + +// x2 + +// CHECK-LABEL: @test_svsub_vector_single2_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single2_s810svint8x2_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint8x2_t test_svsub_vector_single2_s8(svint8x2_t zn, svint8_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s8_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single2_u811svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint8x2_t test_svsub_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u8_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_s1611svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint16x2_t test_svsub_vector_single2_s16(svint16x2_t zn, svint16_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s16_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_u1612svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint16x2_t test_svsub_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u16_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_s3211svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint32x2_t test_svsub_vector_single2_s32(svint32x2_t zn, svint32_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s32_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_u3212svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint32x2_t test_svsub_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u32_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_s6411svint64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint64x2_t test_svsub_vector_single2_s64(svint64x2_t zn, svint64_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s64_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_u6412svuint64x2_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint64x2_t test_svsub_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u64_x2,,,)(zn, zm); +} + + +// x4 + +// CHECK-LABEL: @test_svsub_vector_single4_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single4_s810svint8x4_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint8x4_t test_svsub_vector_single4_s8(svint8x4_t zn, svint8_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s8_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single4_u811svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint8x4_t test_svsub_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u8_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_s1611svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint16x4_t test_svsub_vector_single4_s16(svint16x4_t zn, svint16_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s16_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_u1612svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint16x4_t test_svsub_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u16_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_s3211svint32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint32x4_t test_svsub_vector_single4_s32(svint32x4_t zn, svint32_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s32_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_u3212svuint32x4_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint32x4_t test_svsub_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u32_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_s6411svint64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint64x4_t test_svsub_vector_single4_s64(svint64x4_t zn, svint64_t zm) { + return SVE_ACLE_FUNC(svsub,_single_s64_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svsub_vector_single4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_u6412svuint64x4_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint64x4_t test_svsub_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) { + return SVE_ACLE_FUNC(svsub,_single_u64_x4,,,)(zn, zm); } // @@ -240,179 +929,242 @@ void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CHECK-LABEL: @test_svsub_za32_vg1x2_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_f32j13svfloat32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { - SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x2)(slice_base, zn); +} + +// CHECK-LABEL: @test_svsub_za32_vg1x2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_s32j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { + SVE_ACLE_FUNC(svsub_za32,,_s32,,_vg1x2)(slice_base , zn); } // CHECK-LABEL: @test_svsub_za32_vg1x2_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x2_u32j12svuint32x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { - SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x2)(slice_base, zn); } // CHECK-LABEL: @test_svsub_za64_vg1x2_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_f64j13svfloat64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { - SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x2)(slice_base, zn); } +// CHECK-LABEL: @test_svsub_za64_vg1x2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_s64j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { + SVE_ACLE_FUNC(svsub_za64,,_s64,,_vg1x2)(slice_base, zn); +} // CHECK-LABEL: @test_svsub_za64_vg1x2_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x2_u64j12svuint64x2_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { - SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x2)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x2)(slice_base, zn); } // x4 // CHECK-LABEL: @test_svsub_za32_vg1x4_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_f32j13svfloat32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { - SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x4)(slice_base, zn); +} + +// CHECK-LABEL: @test_svsub_za32_vg1x4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_s32j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { + SVE_ACLE_FUNC(svsub_za32,,_s32,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svsub_za32_vg1x4_u32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za32_vg1x4_u32j12svuint32x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { - SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svsub_za64_vg1x4_f64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_f64j13svfloat64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { - SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x4)(slice_base, zn); +} + +// CHECK-LABEL: @test_svsub_za64_vg1x4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_s64j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { + SVE_ACLE_FUNC(svsub_za64,,_s64,,_vg1x4)(slice_base, zn); } // CHECK-LABEL: @test_svsub_za64_vg1x4_u64( // CHECK-NEXT: entry: -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: @_Z25test_svsub_za64_vg1x4_u64j12svuint64x4_t( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[ADD]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) { - SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x4)(slice_base + 6, zn); + SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x4)(slice_base, zn); } From 08fefc94e6a611bcc13e123d62d5e4d3de2ac92c Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 26 Oct 2023 16:47:37 +0000 Subject: [PATCH 4/7] - Removed incorrect tests added for multi-vector sub --- clang/lib/CodeGen/CodeGenFunction.h | 4 - .../aarch64-sme2-intrinsics/acle_sme2_sub.c | 521 ------------------ 2 files changed, 525 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 5224569ab803fb..42f94c9b540191 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4298,10 +4298,6 @@ class CodeGenFunction : public CodeGenTypeCache { /// for struct of scalable vectors if a function returns struct. llvm::Value *FormSVEBuiltinResult(llvm::Value *Call); - void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, - SmallVectorImpl &Ops, - SVETypeFlags TypeFlags); - llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags, diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c index b499b47944cc5b..4a3b7b475b97b0 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -400,527 +400,6 @@ void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } -// -// Multi-Single Vector -// - -// x2 - -// CHECK-LABEL: @test_svsub_vector_single2_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single2_s810svint8x2_tu10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint8x2_t test_svsub_vector_single2_s8(svint8x2_t zn, svint8_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s8_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single2_u811svuint8x2_tu11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint8x2_t test_svsub_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u8_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_s1611svint16x2_tu11__SVInt16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint16x2_t test_svsub_vector_single2_s16(svint16x2_t zn, svint16_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s16_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_u1612svuint16x2_tu12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint16x2_t test_svsub_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u16_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_s3211svint32x2_tu11__SVInt32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint32x2_t test_svsub_vector_single2_s32(svint32x2_t zn, svint32_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s32_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_u3212svuint32x2_tu12__SVUint32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint32x2_t test_svsub_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u32_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_s64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_s6411svint64x2_tu11__SVInt64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint64x2_t test_svsub_vector_single2_s64(svint64x2_t zn, svint64_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s64_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single2_u64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single2_u6412svuint64x2_tu12__SVUint64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.sub.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint64x2_t test_svsub_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u64_x2,,,)(zn, zm); -} - - -// x4 - -// CHECK-LABEL: @test_svsub_vector_single4_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single4_s810svint8x4_tu10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint8x4_t test_svsub_vector_single4_s8(svint8x4_t zn, svint8_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s8_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z28test_svsub_vector_single4_u811svuint8x4_tu11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint8x4_t test_svsub_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u8_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_s1611svint16x4_tu11__SVInt16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint16x4_t test_svsub_vector_single4_s16(svint16x4_t zn, svint16_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s16_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_u1612svuint16x4_tu12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint16x4_t test_svsub_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u16_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_s3211svint32x4_tu11__SVInt32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint32x4_t test_svsub_vector_single4_s32(svint32x4_t zn, svint32_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s32_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_u3212svuint32x4_tu12__SVUint32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint32x4_t test_svsub_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u32_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_s64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_s6411svint64x4_tu11__SVInt64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint64x4_t test_svsub_vector_single4_s64(svint64x4_t zn, svint64_t zm) { - return SVE_ACLE_FUNC(svsub,_single_s64_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svsub_vector_single4_u64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svsub_vector_single4_u6412svuint64x4_tu12__SVUint64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.sub.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint64x4_t test_svsub_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) { - return SVE_ACLE_FUNC(svsub,_single_u64_x4,,,)(zn, zm); -} - // // Accumulate to ZA // From 4cf42567c9d7ff237fa24dbace4a07492e4fb504 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 2 Nov 2023 09:57:14 +0000 Subject: [PATCH 5/7] - Run update_cc_test_checks script on add/sub tests --- .../aarch64-sme2-intrinsics/acle_sme2_add.c | 16 ++++++++-------- .../aarch64-sme2-intrinsics/acle_sme2_sub.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c index 81dac173e1a016..7842688d150751 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -199,7 +199,7 @@ void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s32j11svint32x2_t11svint32x2_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s32j11svint32x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) @@ -221,7 +221,7 @@ void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u32j12svuint32x2_t12svuint32x2_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u32j12svuint32x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) @@ -243,7 +243,7 @@ void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s64j11svint64x2_t11svint64x2_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_s64j11svint64x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) @@ -265,7 +265,7 @@ void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u64j12svuint64x2_t12svuint64x2_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi2_u64j12svuint64x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) @@ -293,7 +293,7 @@ void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s32j11svint32x4_t11svint32x4_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s32j11svint32x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) @@ -323,7 +323,7 @@ void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u32j12svuint32x4_t12svuint32x4_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u32j12svuint32x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) @@ -353,7 +353,7 @@ void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s64j11svint64x4_t11svint64x4_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_s64j11svint64x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) @@ -383,7 +383,7 @@ void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u64j12svuint64x4_t12svuint64x4_t( +// CPP-CHECK-LABEL: @_Z27test_svadd_write_multi4_u64j12svuint64x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c index 4a3b7b475b97b0..86eee984a2c89b 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -199,7 +199,7 @@ void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s32j11svint32x2_t11svint32x2_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s32j11svint32x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) @@ -221,7 +221,7 @@ void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u32j12svuint32x2_t12svuint32x2_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u32j12svuint32x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) @@ -243,7 +243,7 @@ void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s64j11svint64x2_t11svint64x2_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_s64j11svint64x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) @@ -265,7 +265,7 @@ void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u64j12svuint64x2_t12svuint64x2_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi2_u64j12svuint64x2_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) @@ -293,7 +293,7 @@ void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s32j11svint32x4_t11svint32x4_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s32j11svint32x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) @@ -323,7 +323,7 @@ void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u32j12svuint32x4_t12svuint32x4_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u32j12svuint32x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) @@ -353,7 +353,7 @@ void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s64j11svint64x4_t11svint64x4_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_s64j11svint64x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) @@ -383,7 +383,7 @@ void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CHECK-NEXT: ret void // -// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u64j12svuint64x4_t12svuint64x4_t( +// CPP-CHECK-LABEL: @_Z27test_svsub_write_multi4_u64j12svuint64x4_tS_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) From 2bf7de45475dad18e4becf7c04fb29d4e4abf691 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 6 Nov 2023 16:40:07 +0000 Subject: [PATCH 6/7] - Add __arm_streaming and __arm_shared_za attributes to the tests --- .../aarch64-sme2-intrinsics/acle_sme2_add.c | 88 +++++++++---------- .../aarch64-sme2-intrinsics/acle_sme2_sub.c | 56 ++++++------ 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c index 7842688d150751..83968ae84b3645 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -36,7 +36,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { +void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -54,7 +54,7 @@ void test_svadd_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { +void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -72,7 +72,7 @@ void test_svadd_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) { +void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -90,7 +90,7 @@ void test_svadd_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) { +void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -114,7 +114,7 @@ void test_svadd_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) { +void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -136,7 +136,7 @@ void test_svadd_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) { +void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -158,7 +158,7 @@ void test_svadd_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) { +void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -180,7 +180,7 @@ void test_svadd_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) { +void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,_single,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -208,7 +208,7 @@ void test_svadd_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) { +void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -230,7 +230,7 @@ void test_svadd_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) { +void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -252,7 +252,7 @@ void test_svadd_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) { +void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -274,7 +274,7 @@ void test_svadd_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) { +void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -306,7 +306,7 @@ void test_svadd_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) { +void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -336,7 +336,7 @@ void test_svadd_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) { +void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -366,7 +366,7 @@ void test_svadd_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) { +void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -396,7 +396,7 @@ void test_svadd_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) { +void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -428,7 +428,7 @@ void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: ret [[TMP6]] // -svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) { +svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); } @@ -454,7 +454,7 @@ svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) // CPP-CHECK-NEXT: ret [[TMP6]] // -svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) { +svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); } @@ -480,7 +480,7 @@ svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: ret [[TMP6]] // -svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) { +svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); } @@ -506,7 +506,7 @@ svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) // CPP-CHECK-NEXT: ret [[TMP6]] // -svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) { +svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); } @@ -532,7 +532,7 @@ svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: ret [[TMP6]] // -svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) { +svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); } @@ -558,7 +558,7 @@ svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) // CPP-CHECK-NEXT: ret [[TMP6]] // -svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) { +svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); } @@ -584,7 +584,7 @@ svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) // CPP-CHECK-NEXT: ret [[TMP6]] // -svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) { +svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); } @@ -610,7 +610,7 @@ svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) { // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) // CPP-CHECK-NEXT: ret [[TMP6]] // -svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) { +svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); } @@ -651,7 +651,7 @@ svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) // CPP-CHECK-NEXT: ret [[TMP12]] // -svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) { +svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); } @@ -689,7 +689,7 @@ svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) // CPP-CHECK-NEXT: ret [[TMP12]] // -svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) { +svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); } @@ -727,7 +727,7 @@ svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) // CPP-CHECK-NEXT: ret [[TMP12]] // -svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) { +svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); } @@ -765,7 +765,7 @@ svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) // CPP-CHECK-NEXT: ret [[TMP12]] // -svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) { +svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); } @@ -803,7 +803,7 @@ svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) // CPP-CHECK-NEXT: ret [[TMP12]] // -svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) { +svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); } @@ -841,7 +841,7 @@ svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) // CPP-CHECK-NEXT: ret [[TMP12]] // -svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) { +svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); } @@ -879,7 +879,7 @@ svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) // CPP-CHECK-NEXT: ret [[TMP12]] // -svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) { +svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); } @@ -917,7 +917,7 @@ svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) { // CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) // CPP-CHECK-NEXT: ret [[TMP12]] // -svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) { +svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); } @@ -941,7 +941,7 @@ svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { +void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x2)(slice_base, zn); } @@ -959,7 +959,7 @@ void test_svadd_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { +void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x2)(slice_base , zn); } @@ -977,7 +977,7 @@ void test_svadd_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { +void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x2)(slice_base, zn); } @@ -995,7 +995,7 @@ void test_svadd_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { +void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x2)(slice_base, zn); } @@ -1013,7 +1013,7 @@ void test_svadd_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { +void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x2)(slice_base, zn); } @@ -1031,7 +1031,7 @@ void test_svadd_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { +void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x2)(slice_base, zn); } @@ -1055,7 +1055,7 @@ void test_svadd_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { +void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za32,,_f32,,_vg1x4)(slice_base, zn); } @@ -1077,7 +1077,7 @@ void test_svadd_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { +void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za32,,_s32,,_vg1x4)(slice_base, zn); } @@ -1099,7 +1099,7 @@ void test_svadd_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { +void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za32,,_u32,,_vg1x4)(slice_base, zn); } @@ -1121,7 +1121,7 @@ void test_svadd_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { +void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za64,,_f64,,_vg1x4)(slice_base, zn); } @@ -1143,7 +1143,7 @@ void test_svadd_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { +void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za64,,_s64,,_vg1x4)(slice_base, zn); } @@ -1165,6 +1165,6 @@ void test_svadd_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) { +void test_svadd_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svadd_za64,,_u64,,_vg1x4)(slice_base, zn); } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c index 86eee984a2c89b..9570deab0b3919 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_sub.c @@ -36,7 +36,7 @@ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) { +void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -54,7 +54,7 @@ void test_svsub_write_single2_s32(uint32_t slice_base, svint32x2_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) { +void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -72,7 +72,7 @@ void test_svsub_write_single2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) { +void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -90,7 +90,7 @@ void test_svsub_write_single2_s64(uint32_t slice_base, svint64x2_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) { +void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -114,7 +114,7 @@ void test_svsub_write_single2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) { +void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -136,7 +136,7 @@ void test_svsub_write_single4_s32(uint32_t slice_base, svint32x4_t zn, svint32_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) { +void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -158,7 +158,7 @@ void test_svsub_write_single4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) { +void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -180,7 +180,7 @@ void test_svsub_write_single4_s64(uint32_t slice_base, svint64x4_t zn, svint64_t // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.single.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) { +void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,_single,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -208,7 +208,7 @@ void test_svsub_write_single4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) { +void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za32,_s32,_vg1x2)(slice_base, zn, zm); } @@ -230,7 +230,7 @@ void test_svsub_write_multi2_s32(uint32_t slice_base, svint32x2_t zn, svint32x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) { +void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x2)(slice_base, zn, zm); } @@ -252,7 +252,7 @@ void test_svsub_write_multi2_u32(uint32_t slice_base, svuint32x2_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) { +void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za64,_s64,_vg1x2)(slice_base, zn, zm); } @@ -274,7 +274,7 @@ void test_svsub_write_multi2_s64(uint32_t slice_base, svint64x2_t zn, svint64x2_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) { +void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x2_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x2)(slice_base, zn, zm); } @@ -306,7 +306,7 @@ void test_svsub_write_multi2_u64(uint32_t slice_base, svuint64x2_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) { +void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za32,_s32,_vg1x4)(slice_base, zn, zm); } @@ -336,7 +336,7 @@ void test_svsub_write_multi4_s32(uint32_t slice_base, svint32x4_t zn, svint32x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) { +void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za32,_u32,_vg1x4)(slice_base, zn, zm); } @@ -366,7 +366,7 @@ void test_svsub_write_multi4_u32(uint32_t slice_base, svuint32x4_t zn, svuint32x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) { +void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za64,_s64,_vg1x4)(slice_base, zn, zm); } @@ -396,7 +396,7 @@ void test_svsub_write_multi4_s64(uint32_t slice_base, svint64x4_t zn, svint64x4_ // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.write.za.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) { +void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x4_t zm) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } @@ -420,7 +420,7 @@ void test_svsub_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { +void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x2)(slice_base, zn); } @@ -438,7 +438,7 @@ void test_svsub_za32_vg1x2_f32(uint32_t slice_base, svfloat32x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { +void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za32,,_s32,,_vg1x2)(slice_base , zn); } @@ -456,7 +456,7 @@ void test_svsub_za32_vg1x2_s32(uint32_t slice_base, svint32x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x2.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { +void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x2)(slice_base, zn); } @@ -474,7 +474,7 @@ void test_svsub_za32_vg1x2_u32(uint32_t slice_base, svuint32x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { +void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x2)(slice_base, zn); } @@ -492,7 +492,7 @@ void test_svsub_za64_vg1x2_f64(uint32_t slice_base, svfloat64x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { +void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za64,,_s64,,_vg1x2)(slice_base, zn); } @@ -510,7 +510,7 @@ void test_svsub_za64_vg1x2_s64(uint32_t slice_base, svint64x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x2.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { +void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x2)(slice_base, zn); } @@ -534,7 +534,7 @@ void test_svsub_za64_vg1x2_u64(uint32_t slice_base, svuint64x2_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4f32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { +void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za32,,_f32,,_vg1x4)(slice_base, zn); } @@ -556,7 +556,7 @@ void test_svsub_za32_vg1x4_f32(uint32_t slice_base, svfloat32x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { +void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za32,,_s32,,_vg1x4)(slice_base, zn); } @@ -578,7 +578,7 @@ void test_svsub_za32_vg1x4_s32(uint32_t slice_base, svint32x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za32.vg1x4.nxv4i32(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { +void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za32,,_u32,,_vg1x4)(slice_base, zn); } @@ -600,7 +600,7 @@ void test_svsub_za32_vg1x4_u32(uint32_t slice_base, svuint32x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2f64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { +void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za64,,_f64,,_vg1x4)(slice_base, zn); } @@ -622,7 +622,7 @@ void test_svsub_za64_vg1x4_f64(uint32_t slice_base, svfloat64x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { +void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za64,,_s64,,_vg1x4)(slice_base, zn); } @@ -644,6 +644,6 @@ void test_svsub_za64_vg1x4_s64(uint32_t slice_base, svint64x4_t zn) { // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sub.za64.vg1x4.nxv2i64(i32 [[SLICE_BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) // CPP-CHECK-NEXT: ret void // -void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) { +void test_svsub_za64_vg1x4_u64(uint32_t slice_base, svuint64x4_t zn) __arm_streaming __arm_shared_za { SVE_ACLE_FUNC(svsub_za64,,_u64,,_vg1x4)(slice_base, zn); } From 65b7b668d1662e5490d900ed155d4c19816c3e59 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 6 Nov 2023 16:48:57 +0000 Subject: [PATCH 7/7] - Moved the multi-single vector adds to a separate test file - Ran clang-format to fix formatting issues in SveEmitter.cpp - Added a comment to arm_sve.td to explain why some SME2 intrinsics are included --- clang/include/clang/Basic/arm_sve.td | 3 + .../aarch64-sme2-intrinsics/acle_sme2_add.c | 521 ----------------- .../acle_sme2_vector_add.c | 539 ++++++++++++++++++ clang/utils/TableGen/SveEmitter.cpp | 21 +- 4 files changed, 554 insertions(+), 530 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 53d5d32807ce39..3d4c2129565903 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1984,6 +1984,9 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; //////////////////////////////////////////////////////////////////////////////// // SME2 +// SME intrinsics which operate only on vectors and do not require ZA should be added here, +// as they could possibly become SVE instructions in the future. + let TargetGuard = "sme2" in { // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c index 83968ae84b3645..dd96dca70d6370 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c @@ -400,527 +400,6 @@ void test_svadd_write_multi4_u64(uint32_t slice_base, svuint64x4_t zn, svuint64x SVE_ACLE_FUNC(svadd_write,,_za64,_u64,_vg1x4)(slice_base, zn, zm); } -// -// Multi-Single Vector -// - -// x2 - -// CHECK-LABEL: @test_svadd_vector_single2_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_s64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single2_u64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CHECK-NEXT: ret [[TMP6]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) -// CPP-CHECK-NEXT: ret [[TMP6]] -// -svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); -} - - -// x4 - -// CHECK-LABEL: @test_svadd_vector_single4_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_s64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); -} - -// CHECK-LABEL: @test_svadd_vector_single4_u64( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CHECK-NEXT: ret [[TMP12]] -// -// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( -// CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) -// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) -// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) -// CPP-CHECK-NEXT: ret [[TMP12]] -// -svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { - return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); -} - // // Accumulate to ZA // diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c new file mode 100644 index 00000000000000..85c4b9b09546b3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_vector_add.c @@ -0,0 +1,539 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Multi-Single Vector +// + +// x2 + +// CHECK-LABEL: @test_svadd_vector_single2_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_s810svint8x2_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint8x2_t test_svadd_vector_single2_s8(svint8x2_t zn, svint8_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s8_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single2_u811svuint8x2_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv16i8( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint8x2_t test_svadd_vector_single2_u8(svuint8x2_t zn, svuint8_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u8_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s1611svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint16x2_t test_svadd_vector_single2_s16(svint16x2_t zn, svint16_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s16_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u1612svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv8i16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint16x2_t test_svadd_vector_single2_u16(svuint16x2_t zn, svuint16_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u16_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s3211svint32x2_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint32x2_t test_svadd_vector_single2_s32(svint32x2_t zn, svint32_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s32_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u3212svuint32x2_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv4i32( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint32x2_t test_svadd_vector_single2_u32(svuint32x2_t zn, svuint32_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u32_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_s6411svint64x2_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svint64x2_t test_svadd_vector_single2_s64(svint64x2_t zn, svint64_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s64_x2,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single2_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CHECK-NEXT: ret [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single2_u6412svuint64x2_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.add.single.x2.nxv2i64( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP4]], [[TMP5]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP6]] +// +svuint64x2_t test_svadd_vector_single2_u64(svuint64x2_t zn, svuint64_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u64_x2,,,)(zn, zm); +} + + +// x4 + +// CHECK-LABEL: @test_svadd_vector_single4_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_s810svint8x4_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint8x4_t test_svadd_vector_single4_s8(svint8x4_t zn, svint8_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s8_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z28test_svadd_vector_single4_u811svuint8x4_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[ZN]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 16) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP8]], [[TMP9]], i64 32) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP10]], [[TMP11]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint8x4_t test_svadd_vector_single4_u8(svuint8x4_t zn, svuint8_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u8_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s1611svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint16x4_t test_svadd_vector_single4_s16(svint16x4_t zn, svint16_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s16_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u1612svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP8]], [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP10]], [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint16x4_t test_svadd_vector_single4_u16(svuint16x4_t zn, svuint16_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u16_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s3211svint32x4_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint32x4_t test_svadd_vector_single4_s32(svint32x4_t zn, svint32_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s32_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u3212svuint32x4_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 4) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP8]], [[TMP9]], i64 8) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP10]], [[TMP11]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint32x4_t test_svadd_vector_single4_u32(svuint32x4_t zn, svuint32_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u32_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_s6411svint64x4_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svint64x4_t test_svadd_vector_single4_s64(svint64x4_t zn, svint64_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_s64_x4,,,)(zn, zm); +} + +// CHECK-LABEL: @test_svadd_vector_single4_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CHECK-NEXT: ret [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z29test_svadd_vector_single4_u6412svuint64x4_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { , , , } @llvm.aarch64.sve.add.single.x4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 2) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP8]], [[TMP9]], i64 4) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP10]], [[TMP11]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP12]] +// +svuint64x4_t test_svadd_vector_single4_u64(svuint64x4_t zn, svuint64_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svadd,_single_u64_x4,,,)(zn, zm); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 21fb04d934bc96..96253c70932773 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -357,7 +357,8 @@ class SVEEmitter { void createHeader(raw_ostream &o); // Emits core intrinsics in both arm_sme.h and arm_sve.h - void createCoreHeaderIntrinsics(raw_ostream &o, SVEEmitter &Emitter, ACLEKind Kind); + void createCoreHeaderIntrinsics(raw_ostream &o, SVEEmitter &Emitter, + ACLEKind Kind); /// Emit all the __builtin prototypes and code needed by Sema. void createBuiltins(raw_ostream &o); @@ -1206,14 +1207,16 @@ void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) // - Class (is intrinsic overloaded or not) // - Intrinsic name - std::stable_sort( - Defs.begin(), Defs.end(), [](const std::unique_ptr &A, - const std::unique_ptr &B) { - auto ToTuple = [](const std::unique_ptr &I) { - return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName()); - }; - return ToTuple(A) < ToTuple(B); - }); + std::stable_sort(Defs.begin(), Defs.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), + (unsigned)I->getClassKind(), + I->getName()); + }; + return ToTuple(A) < ToTuple(B); + }); // Actually emit the intrinsic declarations. for (auto &I : Defs)