Skip to content

release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) #128001

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2025

Conversation

llvmbot
Copy link
Member

@llvmbot llvmbot commented Feb 20, 2025

Backport 70ed381

Requested by: @davemgreen

@llvmbot llvmbot added this to the LLVM 20.X Release milestone Feb 20, 2025
@llvmbot
Copy link
Member Author

llvmbot commented Feb 20, 2025

@nikic What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Feb 20, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: None (llvmbot)

Changes

Backport 70ed381

Requested by: @davemgreen


Patch is 34.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128001.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll (+6-6)
  • (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+72-72)
  • (modified) llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll (+6-6)
  • (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+72-72)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d0a62340a5f32..536c193d52080 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7801,13 +7801,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
   if (AreExactFloatBounds) {
     // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
     auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
-    auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+    auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
                                      SrcTy.changeElementSize(1), Src, MaxC);
     auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
     // Clamp by MaxFloat from above. NaN cannot occur.
     auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
     auto MinP =
-        MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+        MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
                              MinC, MachineInstr::FmNoNans);
     auto Min =
         MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..39e2db3a52d2c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
 ; CHECK-GI-NEXT:    adrp x8, .LCPI30_1
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x9, #-4603241769126068224 // =0xc01e000000000000
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, x9, lt
+; CHECK-GI-NEXT:    csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
 ; CHECK-GI-NEXT:    mov x8, #281474976448512 // =0xfffffffc0000
 ; CHECK-GI-NEXT:    movk x8, #16413, lsl #48
-; CHECK-GI-NEXT:    csel x8, x20, x8, gt
+; CHECK-GI-NEXT:    csel x8, x20, x8, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..67d625dd16473 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_1
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x9, #-4603241769126068224 // =0xc01e000000000000
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, x9, lt
+; CHECK-GI-NEXT:    csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
 ; CHECK-GI-NEXT:    mov x8, #281474976448512 // =0xfffffffc0000
 ; CHECK-GI-NEXT:    movk x8, #16413, lsl #48
-; CHECK-GI-NEXT:    csel x8, x20, x8, gt
+; CHECK-GI-NEXT:    csel x8, x20, x8, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -656,26 +656,26 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov v1.16b, v2.16b
 ; CHECK-GI-NEXT:    str q2, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x20, #-4603241769126068224 // =0xc01e000000000000
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x21, x8, x20, lt
+; CHECK-GI-NEXT:    csel x21, x8, x20, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x21
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    movk x22, #16413, lsl #48
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x21, x22, gt
+; CHECK-GI-NEXT:    csel x8, x21, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -686,21 +686,21 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel w21, wzr, w19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, x20, lt
+; CHECK-GI-NEXT:    csel x20, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x20, x22, gt
+; CHECK-GI-NEXT:    csel x8, x20, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -827,26 +827,26 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    str q2, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x20, #-4603241769126068224 // =0xc01e000000000000
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x21, x8, x20, lt
+; CHECK-GI-NEXT:    csel x21, x8, x20, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x21
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-GI-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    movk x22, #16413, lsl #48
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x21, x22, gt
+; CHECK-GI-NEXT:    csel x8, x21, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -856,21 +856,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel w21, wzr, w19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x23, x8, x20, lt
+; CHECK-GI-NEXT:    csel x23, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x23
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x23, x22, gt
+; CHECK-GI-NEXT:    csel x8, x23, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -881,21 +881,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel w23, wzr, w19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, x20, lt
+; CHECK-GI-NEXT:    csel x20, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x20, x22, gt
+; CHECK-GI-NEXT:    csel x8, x20, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
@@ -1043,26 +1043,26 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
 ; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    stp q1, q3, [sp, #64] // 32-byte Folded Spill
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x20, #-4603241769126068224 // =0xc01e000000000000
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x21, x8, x20, lt
+; CHECK-GI-NEXT:    csel x21, x8, x20, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x21
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
 ; CHECK-GI-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    movk x22, #16413, lsl #48
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x21, x22, gt
+; CHECK-GI-NEXT:    csel x8, x21, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1073,21 +1073,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel w21, wzr, w19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x23, x8, x20, lt
+; CHECK-GI-NEXT:    csel x23, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x23
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x23, x22, gt
+; CHECK-GI-NEXT:    csel x8, x23, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -1098,20 +1098,20 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel w23, wzr, w19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x24, x8, x20, lt
+; CHECK-GI-NEXT:    csel x24, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x24
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x24, x22, gt
+; CHECK-GI-NEXT:    csel x8, x24, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -1121,21 +1121,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp, #64] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel w24, wzr, w19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, x20, lt
+; CHECK-GI-NEXT:    csel x20, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x20, x22, gt
+; CHECK-GI-NEXT:    csel x8, x20, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfsi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
@@ -5633,26 +5633,26 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov v1.16b, v2.16b
 ; CHECK-GI-NEXT:    str q2, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x20, #-4594234569871327232 // =0xc03e000000000000
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x21, x8, x20, lt
+; CHECK-GI-NEXT:    csel x21, x8, x20, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI86_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x21
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI86_0]
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    mov x22, #-1125899906842624 // =0xfffc000000000000
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    mov x23, #4629137466983448575 // =0x403dffffffffffff
-; CHECK-GI-NEXT:    csel x8, x19, x22, gt
+; CHECK-GI-NEXT:    csel x8, x19, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x21, x23, gt
+; CHECK-GI-NEXT:    csel x8, x21, x23, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfdi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -5663,21 +5663,21 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    csel x21, xzr, x19, ne
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, x20, lt
+; CHECK-GI-NEXT:    csel x20, x8, x20, gt
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    csel x8, x19, x22, gt
+; CHECK-GI-NEXT:    csel x8, x19, x22, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
-; CHECK-GI-NEXT:    csel x8, x20, x23, gt
+; CHECK-GI-NEXT:    csel x8, x20, x23, lt
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    bl __fixtfdi
 ; CHECK-GI-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 0dea7be5052d0..46950e7a60349 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -797,25 +797,25 @@ define i32 @test_unsigned_f128_i32(fp128 %f) {
 ; CHECK-GI-NEXT:    adrp x8, .LCPI30_1
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT:    bl __getf2
+; CHECK-GI-NEXT:    bl __gttf2
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
 ; CHECK-GI-NEXT:    mov v0.d[0], x19
-; CHECK-GI-NEXT:    csel x20, x8, xzr, lt
+; CHECK-GI-NEXT:    csel x20, x8, xzr, gt
 ; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
 ; CHECK-GI-NEXT:    mov v0.d[1], x20
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:    bl __gttf2
+; CHECK-GI-NEXT:    bl __lttf2
 ; CHECK-GI-NEXT:    cmp w0, #0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:    csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
 ; CHECK-GI-NEXT:    mov x8, #281474976579584 // =0xfffffffe0000
 ; CHECK-GI-NEXT:    movk x8, #16414, lsl #48
-; CHECK-GI-NEXT:    csel x8, x20, x8, gt
+; C...
[truncated]

The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select
it appears the order of the operands was chosen badly. This switches the
conditions used to keep the constant on the RHS.

(cherry picked from commit 70ed381)
@tstellar tstellar merged commit 6dcece4 into llvm:release/20.x Feb 20, 2025
7 of 9 checks passed
Copy link

@davemgreen (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

4 participants