-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Set predicates for FP/SIMD InstAliases #79033
Conversation
These are aliases for instructions which are are only available when the fp-armv8 or neon features are enabled, so their predicates should be set appropriately.
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-aarch64 Author: None (ostannard) ChangesThese are aliases for instructions which are are only available when the fp-armv8 or neon features are enabled, so their predicates should be set appropriately. Full diff: https://github.com/llvm/llvm-project/pull/79033.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c63f23bda6805a5..cdd9ad5f7e442e8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -3520,6 +3520,7 @@ def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+let Predicates = [HasFPARMv8] in {
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
@@ -3530,6 +3531,7 @@ def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
+}
// zextload -> i64
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
@@ -4163,6 +4165,7 @@ def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+let Predicates = [HasFPARMv8] in {
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
@@ -4173,6 +4176,7 @@ def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
+}
def : InstAlias<"strb $Rt, [$Rn, $offset]",
(STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
@@ -4583,8 +4587,10 @@ def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
// Similarly add aliases
def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
Requires<[HasFullFP16]>;
+let Predicates = [HasFPARMv8] in {
def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
+}
def : Pat<(bf16 fpimm0),
(FMOVH0)>;
@@ -5026,10 +5032,12 @@ defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
// Aliases for MVN -> NOT.
+let Predicates = [HasNEON] in {
def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
(NOTv8i8 V64:$Vd, V64:$Vn)>;
def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
(NOTv16i8 V128:$Vd, V128:$Vn)>;
+}
def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
@@ -5292,6 +5300,7 @@ def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+let Predicates = [HasNEON] in {
def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
@@ -5469,6 +5478,7 @@ def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
"|faclt.2d\t$dst, $src1, $src2}",
(FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
//===----------------------------------------------------------------------===//
// Advanced SIMD three scalar instructions.
@@ -5531,6 +5541,7 @@ defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
int_aarch64_neon_fmulx,
[HasNEONorSME]>;
+let Predicates = [HasNEON] in {
def : InstAlias<"cmls $dst, $src1, $src2",
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"cmle $dst, $src1, $src2",
@@ -5539,6 +5550,8 @@ def : InstAlias<"cmlo $dst, $src1, $src2",
(CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"cmlt $dst, $src1, $src2",
(CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+}
+let Predicates = [HasFPARMv8] in {
def : InstAlias<"fcmle $dst, $src1, $src2",
(FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
def : InstAlias<"fcmle $dst, $src1, $src2",
@@ -5555,6 +5568,7 @@ def : InstAlias<"faclt $dst, $src1, $src2",
(FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
def : InstAlias<"faclt $dst, $src1, $src2",
(FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+}
//===----------------------------------------------------------------------===//
// Advanced SIMD three scalar instructions (mixed operands).
@@ -7001,6 +7015,7 @@ defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
// AdvSIMD ORR
defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
+let Predicates = [HasNEON] in {
def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
@@ -7020,6 +7035,7 @@ def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+}
// AdvSIMD FMOV
def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
@@ -7103,6 +7119,7 @@ let Predicates = [HasNEON] in {
ssub)>;
}
+let Predicates = [HasNEON] in {
def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
@@ -7112,6 +7129,7 @@ def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+}
def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
(MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
@@ -7147,6 +7165,7 @@ def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
+let Predicates = [HasNEON] in {
def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
@@ -7156,6 +7175,7 @@ def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+}
def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
(MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
@@ -7643,6 +7663,7 @@ def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(SSHLLv4i32_shift V128:$Rn, (i32 0))>;
+let Predicates = [HasNEON] in {
// Vector shift sxtl aliases
def : InstAlias<"sxtl.8h $dst, $src1",
(SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
@@ -7698,6 +7719,7 @@ def : InstAlias<"uxtl2.2d $dst, $src1",
(USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
(USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+}
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
@@ -8146,7 +8168,7 @@ def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
// for AES fusion on some CPUs.
-let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
+let hasSideEffects = 0, mayStore = 0, mayLoad = 0, Predicates = [HasAES] in {
def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
Sched<[WriteVq]>;
def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
diff --git a/llvm/test/MC/AArch64/no-fp-errors.s b/llvm/test/MC/AArch64/no-fp-errors.s
index 1595ba4798b082d..9fe3cba83e25513 100644
--- a/llvm/test/MC/AArch64/no-fp-errors.s
+++ b/llvm/test/MC/AArch64/no-fp-errors.s
@@ -191,3 +191,173 @@ label:
// CHECK: [[@LINE-1]]:7: error: expected writable system register or pstate
msr FPSR, x0
// CHECK: [[@LINE-1]]:7: error: expected writable system register or pstate
+
+ ldr s0, [x0, #1]
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ str q0, [x0, #1]
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+
+ fmov s0, #0.0
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ fmov d0, #0.0
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+
+ mvn v0.8b, v1.8b
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvn v0.16b, v1.16b
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ mov v0.16b, v1.16b
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mov v0.8h, v1.8h
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mov v0.4s, v1.4s
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mov v0.2d, v1.2d
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ mov v0.8b, v1.8b
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mov v0.4h, v1.4h
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mov v0.2s, v1.2s
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mov v0.1d, v1.1d
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ faclt v0.4h, v1.4h, v2.4h
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fullfp16 neon
+ faclt v0.8h, v1.8h, v2.8h
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fullfp16 neon
+ faclt v0.2s, v1.2s, v2.2s
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ faclt v0.4s, v1.4s, v2.4s
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ faclt v0.2d, v1.2d, v2.2d
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ cmls d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ cmle d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ cmlo d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ cmlt d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ fcmle s0, s1, s2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ fcmle d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ fcmlt s0, s1, s2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ fcmlt d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ facle s0, s1, s2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ facle d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ faclt s0, s1, s2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+ faclt d0, d1, d2
+// CHECK: [[@LINE-1]]:3: error: instruction requires: fp-armv8
+
+ bic v0.4h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ bic v0.8h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ bic v0.2s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ bic v0.4s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ bic.4h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ bic.8h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ bic.2s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ bic.4s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ orr v0.4h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ orr v0.8h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ orr v0.2s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ orr v0.4s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ orr.4h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ orr.8h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ orr.2s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ orr.4s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ movi v0.4h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ movi v0.8h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ movi v0.2s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ movi v0.4s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ movi.4h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ movi.8h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ movi.2s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ movi.4s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ mvni v0.4h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvni v0.8h, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvni v0.2s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvni v0.4s, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ mvni.4h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvni.8h v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvni.2s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ mvni.4s v0, #42
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ sxtl.8h v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ sxtl.4s v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ sxtl.2d v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ sxtl2.8h v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ sxtl2.4s v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ sxtl2.2d v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ uxtl.8h v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ uxtl.4s v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ uxtl.2d v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+
+ uxtl2.8h v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ uxtl2.4s v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
+ uxtl2.2d v0, v1
+// CHECK: [[@LINE-1]]:3: error: instruction requires: neon
|
Script used to find aliases with missing predicates: https://gist.github.com/ostannard/a919cf7c0da5a6971c1e345be1928ef0 |
What do you think about the -mgeneral-regs-only issue? It worries me a bit to be honest, but this might not be making things a whole lot worse than they already are. |
I think that we don't currently support As for how we could implement |
Yeah it's about whether it makes using a feature that people are already using even worse. If they are already relying on it then there is a limit to how much we can just say "it is not supported". I don't think these aliases should be too much of a problem though, and we have the workaround you mentioned in #77817 (comment).
This might be useful for the isNeonAvailable predicates too, which were apparently to allow neon inline assembly in streaming sve functions. |
These are aliases for instructions which are are only available when the fp-armv8 or neon features are enabled, so their predicates should be set appropriately.