Skip to content

Commit 788592f

Browse files
authored
Merge pull request #14 from varkor/fmin-fmax-nan
[X86] Combine fminnum/fmaxnum with non-nan operand to fmin/fmax
2 parents 4efebe3 + 189dd9d commit 788592f

File tree

4 files changed

+100
-4
lines changed

4 files changed

+100
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -38936,9 +38936,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
3893638936
if (Subtarget.useSoftFloat())
3893738937
return SDValue();
3893838938

38939-
// TODO: If an operand is already known to be a NaN or not a NaN, this
38940-
// should be an optional swap and FMAX/FMIN.
38941-
3894238939
EVT VT = N->getValueType(0);
3894338940
if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
3894438941
(Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
@@ -38955,6 +38952,13 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
3895538952
if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())
3895638953
return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
3895738954

38955+
// If one of the operands is known non-NaN use the native min/max instructions
38956+
// with the non-NaN input as second operand.
38957+
if (DAG.isKnownNeverNaN(Op1))
38958+
return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
38959+
if (DAG.isKnownNeverNaN(Op0))
38960+
return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());
38961+
3895838962
// If we have to respect NaN inputs, this takes at least 3 instructions.
3895938963
// Favor a library call when operating on a scalar and minimizing code size.
3896038964
if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize())
@@ -41549,7 +41553,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
4154941553
case X86ISD::FMAX: return combineFMinFMax(N, DAG);
4155041554
case ISD::FMINNUM:
4155141555
case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget);
41552-
case X86ISD::CVTSI2P:
41556+
case X86ISD::CVTSI2P:
4155341557
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
4155441558
case X86ISD::BT: return combineBT(N, DAG, DCI);
4155541559
case ISD::ANY_EXTEND:

llvm/test/CodeGen/X86/extract-fp.ll

+36
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,39 @@ define float @ext_frem_v4f32_constant_op0(<4 x float> %x) {
8484
ret float %ext
8585
}
8686

87+
define float @ext_maxnum_v4f32(<4 x float> %x) nounwind {
88+
; CHECK-LABEL: ext_maxnum_v4f32:
89+
; CHECK: # %bb.0:
90+
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
91+
; CHECK-NEXT: maxss {{.*}}(%rip), %xmm0
92+
; CHECK-NEXT: retq
93+
%v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>)
94+
%r = extractelement <4 x float> %v, i32 2
95+
ret float %r
96+
}
97+
98+
define double @ext_minnum_v2f64(<2 x double> %x) nounwind {
99+
; CHECK-LABEL: ext_minnum_v2f64:
100+
; CHECK: # %bb.0:
101+
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
102+
; CHECK-NEXT: minsd {{.*}}(%rip), %xmm0
103+
; CHECK-NEXT: retq
104+
%v = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0.0, double 1.0>, <2 x double> %x)
105+
%r = extractelement <2 x double> %v, i32 1
106+
ret double %r
107+
}
108+
109+
;define double @ext_maximum_v4f64(<2 x double> %x) nounwind {
110+
; %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 42.0, double 43.0>)
111+
; %r = extractelement <2 x double> %v, i32 1
112+
; ret double %r
113+
;}
114+
115+
;define float @ext_minimum_v4f32(<4 x float> %x) nounwind {
116+
; %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 42.0>)
117+
; %r = extractelement <4 x float> %v, i32 1
118+
; ret float %r
119+
;}
120+
121+
declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
122+
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)

llvm/test/CodeGen/X86/fmaxnum.ll

+28
Original file line numberDiff line numberDiff line change
@@ -349,5 +349,33 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double
349349
ret <2 x double> %r
350350
}
351351

352+
define float @test_maxnum_const_op1(float %x) {
353+
; SSE-LABEL: test_maxnum_const_op1:
354+
; SSE: # %bb.0:
355+
; SSE-NEXT: maxss {{.*}}(%rip), %xmm0
356+
; SSE-NEXT: retq
357+
;
358+
; AVX-LABEL: test_maxnum_const_op1:
359+
; AVX: # %bb.0:
360+
; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0
361+
; AVX-NEXT: retq
362+
%r = call float @llvm.maxnum.f32(float 1.0, float %x)
363+
ret float %r
364+
}
365+
366+
define float @test_maxnum_const_op2(float %x) {
367+
; SSE-LABEL: test_maxnum_const_op2:
368+
; SSE: # %bb.0:
369+
; SSE-NEXT: maxss {{.*}}(%rip), %xmm0
370+
; SSE-NEXT: retq
371+
;
372+
; AVX-LABEL: test_maxnum_const_op2:
373+
; AVX: # %bb.0:
374+
; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0
375+
; AVX-NEXT: retq
376+
%r = call float @llvm.maxnum.f32(float %x, float 1.0)
377+
ret float %r
378+
}
379+
352380
attributes #0 = { "no-nans-fp-math"="true" }
353381

llvm/test/CodeGen/X86/fminnum.ll

+28
Original file line numberDiff line numberDiff line change
@@ -341,5 +341,33 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float>
341341
ret <4 x float> %r
342342
}
343343

344+
define float @test_minnum_const_op1(float %x) {
345+
; SSE-LABEL: test_minnum_const_op1:
346+
; SSE: # %bb.0:
347+
; SSE-NEXT: minss {{.*}}(%rip), %xmm0
348+
; SSE-NEXT: retq
349+
;
350+
; AVX-LABEL: test_minnum_const_op1:
351+
; AVX: # %bb.0:
352+
; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
353+
; AVX-NEXT: retq
354+
%r = call float @llvm.minnum.f32(float 1.0, float %x)
355+
ret float %r
356+
}
357+
358+
define float @test_minnum_const_op2(float %x) {
359+
; SSE-LABEL: test_minnum_const_op2:
360+
; SSE: # %bb.0:
361+
; SSE-NEXT: minss {{.*}}(%rip), %xmm0
362+
; SSE-NEXT: retq
363+
;
364+
; AVX-LABEL: test_minnum_const_op2:
365+
; AVX: # %bb.0:
366+
; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
367+
; AVX-NEXT: retq
368+
%r = call float @llvm.minnum.f32(float %x, float 1.0)
369+
ret float %r
370+
}
371+
344372
attributes #0 = { "no-nans-fp-math"="true" }
345373

0 commit comments

Comments
 (0)