Skip to content

Commit

Permalink
[Clang][SME2] Add multi-vector add/sub builtins (#69725)
Browse files Browse the repository at this point in the history
Adds the following SME2 builtins:
 - sv(add|sub)
 - sv(add|sub)_za32/za64,
 - sv(add|sub)_write_za32/za64

Other changes in this patch:
 - CGBuiltin.cpp: The GetAArch64SMEProcessedOperands function is created
    to avoid duplicating existing code from EmitAArch64SVEBuiltinExpr.
 - arm_sve.td: The add/sub SME2 builtins which do not operate on ZA have
been added to arm_sve.td, matching the corrosponding LLVM IR intrinsic
    names which start with @llvm.aarch64.sve for this reason.
- SveEmitter.cpp: Adds the createCoreHeaderIntrinsics function to remove
    duplicated code in createHeader & createSMEHeader. Uses a new enum
(ACLEKind) to choose either "__builtin_sme_" or "__builtin_sve_" when
    emitting the intrinsics.

See https://github.com/ARM-software/acle/pull/217/files
  • Loading branch information
kmclaughlin-arm authored Nov 7, 2023
1 parent d180cfb commit 48fb8ee
Show file tree
Hide file tree
Showing 8 changed files with 1,936 additions and 51 deletions.
35 changes: 35 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,38 @@ multiclass ZAFPOuterProd<string n_suffix> {

defm SVMOPA : ZAFPOuterProd<"mopa">;
defm SVMOPS : ZAFPOuterProd<"mops">;

////////////////////////////////////////////////////////////////////////////////
// SME2 - ADD, SUB

multiclass ZAAddSub<string n_suffix> {
let TargetGuard = "sme2" in {
def NAME # _WRITE_SINGLE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x2", "vm2d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _WRITE_SINGLE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x4", "vm4d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>;

def NAME # _WRITE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x2", "vm22", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _WRITE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x4", "vm44", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>;

def NAME # _ZA32_VG1x2_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x2", "vm2", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x4", "vm4", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x4", [IsStreaming, IsSharedZA], []>;

let TargetGuard = "sme-i16i64" in {
def NAME # _WRITE_SINGLE_ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x2", "vm2d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _WRITE_SINGLE_ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x4", "vm4d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>;

def NAME # _WRITE_ZA64_VG1x2_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x2", "vm22", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _WRITE_ZA64_VG1x4_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x4", "vm44", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>;

def NAME # _ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>;
}

let TargetGuard = "sme-f64f64" in {
def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>;
def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>;
}
}
}

defm SVADD : ZAAddSub<"add">;
defm SVSUB : ZAAddSub<"sub">;
12 changes: 12 additions & 0 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -1980,3 +1980,15 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv

defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
}

////////////////////////////////////////////////////////////////////////////////
// SME2

// SME intrinsics which operate only on vectors and do not require ZA should be added here,
// as they could possibly become SVE instructions in the future.

let TargetGuard = "sme2" in {
// == ADD (vectors) ==
def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
}
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/arm_sve_sme_incl.td
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
}

class Inst<string n, string p, string t, MergeType mt, string i,
list<FlagType> ft, list<ImmCheck> ch, MemEltType met> {
list<FlagType> ft, list<ImmCheck> ch, MemEltType met = MemEltTyDefault> {
string Name = n;
string Prototype = p;
string Types = t;
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4297,6 +4297,7 @@ class CodeGenFunction : public CodeGenTypeCache {
/// the wider vector. This avoids the error when allocating space in llvm
/// for struct of scalable vectors if a function returns struct.
llvm::Value *FormSVEBuiltinResult(llvm::Value *Call);

llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);

llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,
Expand Down
Loading

0 comments on commit 48fb8ee

Please sign in to comment.