Skip to content

Commit 4942978

Browse files
committed
[RISCV] Add lowering for scalar fmaximum/fminimum.
Unlike fmaxnum and fminnum, these operations propagate nan and consider -0.0 to be less than +0.0. Without Zfa, we don't have a single instruction for this. The lowering I've used forces the other input to nan if one input is a nan. If both inputs are nan, they get swapped. Then use the fmax or fmin instruction. New ISD nodes are needed because fmaxnum/fminnum to not define the order of -0.0 and +0.0. This lowering ensures the snans are quieted though that is probably not required in default environment). Also ensures non-canonical nans are canonicalized, though I'm also not sure that's needed. Another option could be to use fmax/fmin and then overwrite the result based on the inputs being nan, but I'm not sure we can do that with any less code. Future work will handle nonans FMF, and handling the case where we can prove the input isn't nan. This does fix the crash in #64022, but we need to do more work to avoid scalarization. Reviewed By: fakepaper56 Differential Revision: https://reviews.llvm.org/D156069
1 parent 6c48f57 commit 4942978

9 files changed

+521
-20
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
423423
// We need to custom promote this.
424424
if (Subtarget.is64Bit())
425425
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
426+
427+
if (!Subtarget.hasStdExtZfa())
428+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
426429
}
427430

428431
if (Subtarget.hasStdExtFOrZfinx()) {
@@ -445,6 +448,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
445448

446449
if (Subtarget.hasStdExtZfa())
447450
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
451+
else
452+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
448453
}
449454

450455
if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
@@ -461,6 +466,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
461466
} else {
462467
if (Subtarget.is64Bit())
463468
setOperationAction(FPRndMode, MVT::f64, Custom);
469+
470+
setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
464471
}
465472

466473
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
@@ -4624,6 +4631,34 @@ SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
46244631
ISD::CondCode::SETNE);
46254632
}
46264633

4634+
// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
4635+
// operations propagate nans.
4636+
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
4637+
const RISCVSubtarget &Subtarget) {
4638+
SDLoc DL(Op);
4639+
EVT VT = Op.getValueType();
4640+
4641+
SDValue X = Op.getOperand(0);
4642+
SDValue Y = Op.getOperand(1);
4643+
4644+
MVT XLenVT = Subtarget.getXLenVT();
4645+
4646+
// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
4647+
// ensures that when one input is a nan, the other will also be a nan allowing
4648+
// the nan to propagate. If both inputs are nan, this will swap the inputs
4649+
// which is harmless.
4650+
// FIXME: Handle nonans FMF and use isKnownNeverNaN.
4651+
SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
4652+
SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
4653+
4654+
SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
4655+
SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
4656+
4657+
unsigned Opc =
4658+
Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
4659+
return DAG.getNode(Opc, DL, VT, NewX, NewY);
4660+
}
4661+
46274662
/// Get a RISCV target specified VL op for a given SDNode.
46284663
static unsigned getRISCVVLOp(SDValue Op) {
46294664
#define OP_CASE(NODE) \
@@ -4948,6 +4983,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
49484983
}
49494984
return SDValue();
49504985
}
4986+
case ISD::FMAXIMUM:
4987+
case ISD::FMINIMUM:
4988+
return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
49514989
case ISD::FP_EXTEND: {
49524990
SDLoc DL(Op);
49534991
EVT VT = Op.getValueType();
@@ -16054,6 +16092,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
1605416092
NODE_NAME_CASE(FP_EXTEND_BF16)
1605516093
NODE_NAME_CASE(FROUND)
1605616094
NODE_NAME_CASE(FPCLASS)
16095+
NODE_NAME_CASE(FMAX)
16096+
NODE_NAME_CASE(FMIN)
1605716097
NODE_NAME_CASE(READ_CYCLE_WIDE)
1605816098
NODE_NAME_CASE(BREV8)
1605916099
NODE_NAME_CASE(ORC_B)

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ enum NodeType : unsigned {
122122
FROUND,
123123

124124
FPCLASS,
125+
126+
// Floating point fmax and fmin matching the RISC-V instruction semantics.
127+
FMAX, FMIN,
128+
125129
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
126130
// (returns (Lo, Hi)). It takes a chain operand.
127131
READ_CYCLE_WIDE,

llvm/lib/Target/RISCV/RISCVInstrInfoD.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,8 @@ def : Pat<(fneg (any_fma_nsz FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3))
386386
foreach Ext = DExts in {
387387
defm : PatFprFpr_m<fminnum, FMIN_D, Ext>;
388388
defm : PatFprFpr_m<fmaxnum, FMAX_D, Ext>;
389+
defm : PatFprFpr_m<riscv_fmin, FMIN_D, Ext>;
390+
defm : PatFprFpr_m<riscv_fmax, FMAX_D, Ext>;
389391
}
390392

391393
/// Setcc

llvm/lib/Target/RISCV/RISCVInstrInfoF.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def riscv_fcvt_x
5151
def riscv_fcvt_xu
5252
: SDNode<"RISCVISD::FCVT_XU", SDT_RISCVFCVT_X>;
5353

54+
def riscv_fmin : SDNode<"RISCVISD::FMIN", SDTFPBinOp>;
55+
def riscv_fmax : SDNode<"RISCVISD::FMAX", SDTFPBinOp>;
56+
5457
def riscv_strict_fcvt_w_rv64
5558
: SDNode<"RISCVISD::STRICT_FCVT_W_RV64", SDT_RISCVFCVT_W_RV64,
5659
[SDNPHasChain]>;
@@ -555,6 +558,8 @@ def : Pat<(fneg (any_fma_nsz FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3)),
555558
foreach Ext = FExts in {
556559
defm : PatFprFpr_m<fminnum, FMIN_S, Ext>;
557560
defm : PatFprFpr_m<fmaxnum, FMAX_S, Ext>;
561+
defm : PatFprFpr_m<riscv_fmin, FMIN_S, Ext>;
562+
defm : PatFprFpr_m<riscv_fmax, FMAX_S, Ext>;
558563
}
559564

560565
/// Setcc

llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ def : Pat<(fneg (any_fma_nsz FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3)),
348348
foreach Ext = ZfhExts in {
349349
defm : PatFprFpr_m<fminnum, FMIN_H, Ext>;
350350
defm : PatFprFpr_m<fmaxnum, FMAX_H, Ext>;
351+
defm : PatFprFpr_m<riscv_fmin, FMIN_H, Ext>;
352+
defm : PatFprFpr_m<riscv_fmax, FMAX_H, Ext>;
351353
}
352354

353355
/// Setcc

llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -157,21 +157,21 @@ define void @maxnum() {
157157

158158
define void @minimum() {
159159
; CHECK-LABEL: 'minimum'
160-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.minimum.f32(float undef, float undef)
161-
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %2 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef)
162-
; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %3 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
163-
; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %4 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
164-
; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef)
160+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.minimum.f32(float undef, float undef)
161+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef)
162+
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef)
163+
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %4 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef)
164+
; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %5 = call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef)
165165
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x float> @llvm.minimum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
166166
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
167167
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
168168
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
169169
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
170-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.minimum.f64(double undef, double undef)
171-
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
172-
; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %13 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
173-
; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef)
174-
; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef)
170+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call double @llvm.minimum.f64(double undef, double undef)
171+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef)
172+
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef)
173+
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %14 = call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef)
174+
; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %15 = call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef)
175175
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.minimum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
176176
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
177177
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
@@ -202,21 +202,21 @@ define void @minimum() {
202202

203203
define void @maximum() {
204204
; CHECK-LABEL: 'maximum'
205-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.maximum.f32(float undef, float undef)
206-
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %2 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef)
207-
; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %3 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
208-
; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %4 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
209-
; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef)
205+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.maximum.f32(float undef, float undef)
206+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef)
207+
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef)
208+
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %4 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef)
209+
; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %5 = call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef)
210210
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call <vscale x 1 x float> @llvm.maximum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef)
211211
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef)
212212
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef)
213213
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef)
214214
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x float> undef)
215-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.maximum.f64(double undef, double undef)
216-
; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
217-
; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %13 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
218-
; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef)
219-
; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef)
215+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call double @llvm.maximum.f64(double undef, double undef)
216+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef)
217+
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef)
218+
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %14 = call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef)
219+
; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %15 = call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef)
220220
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call <vscale x 1 x double> @llvm.maximum.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> undef)
221221
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x double> undef)
222222
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x double> undef)
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \
3+
; RUN: -verify-machineinstrs -target-abi=ilp32d \
4+
; RUN: | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s
5+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \
6+
; RUN: -verify-machineinstrs -target-abi=lp64d \
7+
; RUN: | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s
8+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zdinx \
9+
; RUN: -verify-machineinstrs -target-abi=ilp32 \
10+
; RUN: | FileCheck -check-prefix=RV32IZFINXZDINX %s
11+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zdinx \
12+
; RUN: -verify-machineinstrs -target-abi=lp64 \
13+
; RUN: | FileCheck -check-prefix=RV64IZFINXZDINX %s
14+
15+
declare double @llvm.minimum.f64(double, double)
16+
17+
define double @fminimum_f64(double %a, double %b) nounwind {
18+
; CHECKIFD-LABEL: fminimum_f64:
19+
; CHECKIFD: # %bb.0:
20+
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
21+
; CHECKIFD-NEXT: fmv.d fa5, fa1
22+
; CHECKIFD-NEXT: beqz a0, .LBB0_3
23+
; CHECKIFD-NEXT: # %bb.1:
24+
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
25+
; CHECKIFD-NEXT: beqz a0, .LBB0_4
26+
; CHECKIFD-NEXT: .LBB0_2:
27+
; CHECKIFD-NEXT: fmin.d fa0, fa0, fa5
28+
; CHECKIFD-NEXT: ret
29+
; CHECKIFD-NEXT: .LBB0_3:
30+
; CHECKIFD-NEXT: fmv.d fa5, fa0
31+
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
32+
; CHECKIFD-NEXT: bnez a0, .LBB0_2
33+
; CHECKIFD-NEXT: .LBB0_4:
34+
; CHECKIFD-NEXT: fmin.d fa0, fa1, fa5
35+
; CHECKIFD-NEXT: ret
36+
;
37+
; RV32IZFINXZDINX-LABEL: fminimum_f64:
38+
; RV32IZFINXZDINX: # %bb.0:
39+
; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
40+
; RV32IZFINXZDINX-NEXT: sw a2, 8(sp)
41+
; RV32IZFINXZDINX-NEXT: sw a3, 12(sp)
42+
; RV32IZFINXZDINX-NEXT: lw a2, 8(sp)
43+
; RV32IZFINXZDINX-NEXT: lw a3, 12(sp)
44+
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
45+
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
46+
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
47+
; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
48+
; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0
49+
; RV32IZFINXZDINX-NEXT: mv a4, a2
50+
; RV32IZFINXZDINX-NEXT: bnez a6, .LBB0_2
51+
; RV32IZFINXZDINX-NEXT: # %bb.1:
52+
; RV32IZFINXZDINX-NEXT: mv a4, a0
53+
; RV32IZFINXZDINX-NEXT: .LBB0_2:
54+
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
55+
; RV32IZFINXZDINX-NEXT: bnez a6, .LBB0_4
56+
; RV32IZFINXZDINX-NEXT: # %bb.3:
57+
; RV32IZFINXZDINX-NEXT: mv a0, a2
58+
; RV32IZFINXZDINX-NEXT: .LBB0_4:
59+
; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
60+
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
61+
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
62+
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
63+
; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
64+
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
65+
; RV32IZFINXZDINX-NEXT: ret
66+
;
67+
; RV64IZFINXZDINX-LABEL: fminimum_f64:
68+
; RV64IZFINXZDINX: # %bb.0:
69+
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
70+
; RV64IZFINXZDINX-NEXT: mv a2, a1
71+
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_3
72+
; RV64IZFINXZDINX-NEXT: # %bb.1:
73+
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
74+
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_4
75+
; RV64IZFINXZDINX-NEXT: .LBB0_2:
76+
; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2
77+
; RV64IZFINXZDINX-NEXT: ret
78+
; RV64IZFINXZDINX-NEXT: .LBB0_3:
79+
; RV64IZFINXZDINX-NEXT: mv a2, a0
80+
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
81+
; RV64IZFINXZDINX-NEXT: bnez a3, .LBB0_2
82+
; RV64IZFINXZDINX-NEXT: .LBB0_4:
83+
; RV64IZFINXZDINX-NEXT: fmin.d a0, a1, a2
84+
; RV64IZFINXZDINX-NEXT: ret
85+
%1 = call double @llvm.minimum.f64(double %a, double %b)
86+
ret double %1
87+
}
88+
89+
declare double @llvm.maximum.f64(double, double)
90+
91+
define double @fmaximum_f64(double %a, double %b) nounwind {
92+
; CHECKIFD-LABEL: fmaximum_f64:
93+
; CHECKIFD: # %bb.0:
94+
; CHECKIFD-NEXT: feq.d a0, fa0, fa0
95+
; CHECKIFD-NEXT: fmv.d fa5, fa1
96+
; CHECKIFD-NEXT: beqz a0, .LBB1_3
97+
; CHECKIFD-NEXT: # %bb.1:
98+
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
99+
; CHECKIFD-NEXT: beqz a0, .LBB1_4
100+
; CHECKIFD-NEXT: .LBB1_2:
101+
; CHECKIFD-NEXT: fmax.d fa0, fa0, fa5
102+
; CHECKIFD-NEXT: ret
103+
; CHECKIFD-NEXT: .LBB1_3:
104+
; CHECKIFD-NEXT: fmv.d fa5, fa0
105+
; CHECKIFD-NEXT: feq.d a0, fa1, fa1
106+
; CHECKIFD-NEXT: bnez a0, .LBB1_2
107+
; CHECKIFD-NEXT: .LBB1_4:
108+
; CHECKIFD-NEXT: fmax.d fa0, fa1, fa5
109+
; CHECKIFD-NEXT: ret
110+
;
111+
; RV32IZFINXZDINX-LABEL: fmaximum_f64:
112+
; RV32IZFINXZDINX: # %bb.0:
113+
; RV32IZFINXZDINX-NEXT: addi sp, sp, -16
114+
; RV32IZFINXZDINX-NEXT: sw a2, 8(sp)
115+
; RV32IZFINXZDINX-NEXT: sw a3, 12(sp)
116+
; RV32IZFINXZDINX-NEXT: lw a2, 8(sp)
117+
; RV32IZFINXZDINX-NEXT: lw a3, 12(sp)
118+
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
119+
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
120+
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
121+
; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
122+
; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0
123+
; RV32IZFINXZDINX-NEXT: mv a4, a2
124+
; RV32IZFINXZDINX-NEXT: bnez a6, .LBB1_2
125+
; RV32IZFINXZDINX-NEXT: # %bb.1:
126+
; RV32IZFINXZDINX-NEXT: mv a4, a0
127+
; RV32IZFINXZDINX-NEXT: .LBB1_2:
128+
; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2
129+
; RV32IZFINXZDINX-NEXT: bnez a6, .LBB1_4
130+
; RV32IZFINXZDINX-NEXT: # %bb.3:
131+
; RV32IZFINXZDINX-NEXT: mv a0, a2
132+
; RV32IZFINXZDINX-NEXT: .LBB1_4:
133+
; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4
134+
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
135+
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
136+
; RV32IZFINXZDINX-NEXT: lw a0, 8(sp)
137+
; RV32IZFINXZDINX-NEXT: lw a1, 12(sp)
138+
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
139+
; RV32IZFINXZDINX-NEXT: ret
140+
;
141+
; RV64IZFINXZDINX-LABEL: fmaximum_f64:
142+
; RV64IZFINXZDINX: # %bb.0:
143+
; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0
144+
; RV64IZFINXZDINX-NEXT: mv a2, a1
145+
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_3
146+
; RV64IZFINXZDINX-NEXT: # %bb.1:
147+
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
148+
; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_4
149+
; RV64IZFINXZDINX-NEXT: .LBB1_2:
150+
; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a2
151+
; RV64IZFINXZDINX-NEXT: ret
152+
; RV64IZFINXZDINX-NEXT: .LBB1_3:
153+
; RV64IZFINXZDINX-NEXT: mv a2, a0
154+
; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1
155+
; RV64IZFINXZDINX-NEXT: bnez a3, .LBB1_2
156+
; RV64IZFINXZDINX-NEXT: .LBB1_4:
157+
; RV64IZFINXZDINX-NEXT: fmax.d a0, a1, a2
158+
; RV64IZFINXZDINX-NEXT: ret
159+
%1 = call double @llvm.maximum.f64(double %a, double %b)
160+
ret double %1
161+
}
162+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
163+
; RV32IFD: {{.*}}
164+
; RV64IFD: {{.*}}

0 commit comments

Comments
 (0)