[ARM] Handle roundeven for MVE. (llvm#142557)

davemgreen · tomtor · commit 9a045817c460 · 2025-06-14T19:47:40.000+02:00
Now that llvm#141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work.
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
@@ -710,7 +710,7 @@ defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">;
 defm vrndpq: vrnd<IRIntBase<"ceil",  [Vector]>, "p">;
 defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">;
 defm vrndxq: vrnd<IRIntBase<"rint",  [Vector]>, "x">;
-defm vrndnq: vrnd<IRInt<"vrintn",    [Vector]>, "n">;
+defm vrndnq: vrnd<IRIntBase<"roundeven", [Vector]>, "n">;
 
 multiclass compare_with_pred<string condname, dag arguments,
                              dag cmp, string suffix> {
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -148,7 +148,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a)
 
 // CHECK-LABEL: @test_vrndnq_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]])
 // CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vrndnq_f16(float16x8_t a)
@@ -162,7 +162,7 @@ float16x8_t test_vrndnq_f16(float16x8_t a)
 
 // CHECK-LABEL: @test_vrndnq_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]])
 // CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vrndnq_f32(float32x4_t a)
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in {
     [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;
 }
 
-def int_arm_mve_vrintn: DefaultAttrsIntrinsic<
-  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 def int_arm_mve_vcls: DefaultAttrsIntrinsic<
   [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
@@ -767,6 +767,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
         return false; // Not 'arm.mve.vctp64'.
       }
 
+      if (Name.starts_with("vrintn.v")) {
+        NewFn = Intrinsic::getOrInsertDeclaration(
+            F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
+        return true;
+      }
+
       // These too are changed to accept a v2i1 instead of the old v4i1.
       if (Name.consume_back(".v4i1")) {
         // 'arm.mve.*.v4i1'.
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
 }
 
 multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
-  defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+  defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;
   defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
   defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
   defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll
@@ -424,21 +424,74 @@ entry:
   ret <2 x double> %0
 }
 
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
-declare <4 x float> @llvm.rint.v4f32(<4 x float>)
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
-declare <4 x float> @llvm.floor.v4f32(<4 x float>)
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
-declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
-declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
-declare <8 x half> @llvm.rint.v8f16(<8 x half>)
-declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
-declare <8 x half> @llvm.floor.v8f16(<8 x half>)
-declare <8 x half> @llvm.round.v8f16(<8 x half>)
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-declare <2 x double> @llvm.rint.v2f64(<2 x double>)
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-declare <2 x double> @llvm.round.v2f64(<2 x double>)
+define arm_aapcs_vfpcc <4 x float> @froundeven_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: froundeven_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vrintn.f32 s3, s3
+; CHECK-MVE-NEXT:    vrintn.f32 s2, s2
+; CHECK-MVE-NEXT:    vrintn.f32 s1, s1
+; CHECK-MVE-NEXT:    vrintn.f32 s0, s0
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: froundeven_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintn.f32 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.roundeven.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @froundeven_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: froundeven_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
+; CHECK-MVE-NEXT:    vrintn.f16 s0, s0
+; CHECK-MVE-NEXT:    vrintn.f16 s4, s4
+; CHECK-MVE-NEXT:    vins.f16 s0, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
+; CHECK-MVE-NEXT:    vrintn.f16 s4, s4
+; CHECK-MVE-NEXT:    vrintn.f16 s1, s1
+; CHECK-MVE-NEXT:    vins.f16 s1, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
+; CHECK-MVE-NEXT:    vrintn.f16 s4, s4
+; CHECK-MVE-NEXT:    vrintn.f16 s2, s2
+; CHECK-MVE-NEXT:    vins.f16 s2, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
+; CHECK-MVE-NEXT:    vrintn.f16 s4, s4
+; CHECK-MVE-NEXT:    vrintn.f16 s3, s3
+; CHECK-MVE-NEXT:    vins.f16 s3, s4
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: froundeven_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintn.f16 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.roundeven.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @froundeven_float64_t(<2 x double> %src) {
+; CHECK-LABEL: froundeven_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl roundeven
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl roundeven
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.roundeven.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
 
+; The llvm.arm.mve.vrintn should auto-upgrade to llvm.roundeven, which are selected to vrintn.
+
 define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) {
 ; CHECK-LABEL: test_vrndnq_f16:
 ; CHECK:       @ %bb.0: @ %entry

Original file line number	Diff line number	Diff line change
`@@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in {`
`1306`	`1306`	`[llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>;`
`1307`	`1307`	`}`
`1308`	`1308`
`1309`		`-def int_arm_mve_vrintn: DefaultAttrsIntrinsic<`
`1310`		`- [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;`
`1311`	`1309`	`def int_arm_mve_vcls: DefaultAttrsIntrinsic<`
`1312`	`1310`	`[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;`
`1313`	`1311`
Original file line number	Diff line number	Diff line change
`@@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,`
`3527`	`3527`	`}`
`3528`	`3528`
`3529`	`3529`	`multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {`
`3530`		`- defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;`
	`3530`	`+ defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;`
`3531`	`3531`	`defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;`
`3532`	`3532`	`defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;`
`3533`	`3533`	`defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;`