-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64] Prefer SVE2.2 zeroing forms of certain instructions with an all-true predicate #120595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Prefer SVE2.2 zeroing forms of certain instructions with an all-true predicate #120595
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) ChangesPatch is 36.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120595.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ff3ca8a24fc04a..6aa8cd4f0232ac 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -228,6 +228,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
+ bool SelectAny(SDValue) { return true; }
+
bool SelectDupZero(SDValue N) {
switch(N->getOpcode()) {
case AArch64ISD::DUP:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d015cc15581ad0..b1c61c8f84ef45 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -381,9 +381,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
-def UseUnaryUndefPseudos
- : Predicate<"!(Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2()))">;
-
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4a4412f9df6a1a..1e297eb5feb266 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -675,14 +675,6 @@ let Predicates = [HasSVEorSME] in {
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
- let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
- defm FABS_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fabs_mt>;
- defm FNEG_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fneg_mt>;
-
- defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64abs_mt>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
- }
-
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
// No dedicated instruction, so just clear the sign bit.
def : Pat<(VT (fabs VT:$op)),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f8a633872cb966..70e72ddcc1041a 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -484,6 +484,7 @@ let Predicates = [HasSVEorSME] in {
//===----------------------------------------------------------------------===//
def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
+def SVEAny : ComplexPattern<vAny, 0, "SelectAny", []>;
class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
Instruction inst>
@@ -504,10 +505,15 @@ multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, Value
(inst $Op3, $Op1, $Op2)>;
}
-class SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
- ValueType vts, Instruction inst>
- : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
- (inst $Op1, $Op2)>;
+multiclass SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst> {
+ let AddedComplexity = 10 in {
+ def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (vtd (SVEAny)))),
+ (inst $Op1, $Op2)>;
+ }
+}
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
@@ -576,10 +582,15 @@ multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1
(inst $Op1, $Op2, $Op3)>;
}
-class SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
- ValueType vt2, ValueType vt3, Instruction inst>
- : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
- (inst $Op1, $Op2)>;
+multiclass SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst> {
+ let AddedComplexity = 10 in {
+ def : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (vt1 (SVEAny)), (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
+ (inst $Op2, $Op3)>;
+ }
+}
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4,
@@ -2862,8 +2873,8 @@ multiclass sve_fp_fcvtltz<string asm, string op> {
def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
@@ -3273,7 +3284,7 @@ class sve_fp_z2op_p_zd<bits<7> opc,string asm, RegisterOperand i_zprtype,
multiclass sve_fp_z2op_p_zd<string asm, SDPatternOperator op> {
def _DtoS : sve_fp_z2op_p_zd<0b0001010, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve_fp_z2op_p_zd_hsd<bits<5> opc, string asm> {
@@ -3290,7 +3301,7 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
multiclass sve_fp_z2op_p_zd_bfcvt<string asm, SDPatternOperator op> {
def NAME : sve_fp_z2op_p_zd<0b1001010, asm, ZPR32, ZPR16>;
- def : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
}
multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperator ir_op> {
@@ -3302,14 +3313,14 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
def _DtoS : sve_fp_z2op_p_zd<{ 0b111100, U }, asm, ZPR64, ZPR32>;
def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
}
multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
@@ -3336,12 +3347,12 @@ multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
@@ -4859,6 +4870,16 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4867,10 +4888,10 @@ multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op>
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
@@ -4984,6 +5005,17 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4991,12 +5023,12 @@ multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternO
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
index 1caee994220f05..55485290a392ad 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
@@ -664,3 +664,535 @@ entry:
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
+
+define <vscale x 2 x double> @test_svfabs_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svfabs_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svfabs_f64_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svfabs_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svfabs_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svfabs_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv2f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svfabs_f32_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svfabs_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv2f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 8 x half> @test_svfabs_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svfabs_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv2f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svfabs_f16_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svfabs_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+...
[truncated]
|
✅ With the latest revision this PR passed the undef deprecator. |
… all-true predicate When the predicate of a destructive operation is known to be all-true, for example fabs z0.s, p0/m, z1.s then the entire output register is written and we can use a zeroing (instead of a merging) form of the instruction, for example fabs z0.s, p0/z, z1.s thus eliminate the dependency on the input-output destination register without the need to insert a `movprfx`. This patch complements (and in the case of 2b3266c, fixes a regression) the following: 7f4414b [AArch64] Generate zeroing forms of certain SVE2.2 instructions (4/11) (llvm#116830) 2474cf7 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (3/11) (llvm#116829) 6f285d3 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11) (llvm#116828) 2b3266c [AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11) (llvm#116259)
c714e16
to
482b828
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do prefer this, though SVEAny
is a bit unfortunate!
I was pretty sure this would conflict with #118788, but apparently not.
Cheers!
Right, neither of these patches touches |
Yeah - maybe this is worth some investigation. (Just a note after reading my comment again): For narrowing top conversions in particular, this might be because the passthrough operand is always live regardless of the predicate. (Edited after I realised I was not quite right, my mind was reminded of #110278). |
When the predicate of a destructive operation is known to be all-true,
for example
then the entire output register is written and we can use a zeroing
(instead of a merging) form of the instruction, for example
thus eliminate the dependency on the input-output destination register
without the need to insert a
movprfx
.This patch complements (and in the case of 2b3266c, fixes a regression)
the following:
7f4414b [AArch64] Generate zeroing forms of certain SVE2.2 instructions (4/11) (#116830)
2474cf7 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (3/11) (#116829)
6f285d3 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11) (#116828)
2b3266c [AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11) (#116259)