Merge pull request #14 from varkor/fmin-fmax-nan

alexcrichton · web-flow · commit 788592fb2740 · 2019-05-30T17:28:31.000-05:00
[X86] Combine fminnum/fmaxnum with non-nan operand to fmin/fmax
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38936,9 +38936,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   if (Subtarget.useSoftFloat())
     return SDValue();
 
-  // TODO: If an operand is already known to be a NaN or not a NaN, this
-  //       should be an optional swap and FMAX/FMIN.
-
   EVT VT = N->getValueType(0);
   if (!((Subtarget.hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
         (Subtarget.hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
@@ -38955,6 +38952,13 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())
     return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
 
+  // If one of the operands is known non-NaN use the native min/max instructions
+  // with the non-NaN input as second operand.
+  if (DAG.isKnownNeverNaN(Op1))
+    return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
+  if (DAG.isKnownNeverNaN(Op0))
+    return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());
+
   // If we have to respect NaN inputs, this takes at least 3 instructions.
   // Favor a library call when operating on a scalar and minimizing code size.
   if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize())
@@ -41549,7 +41553,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FMAX:        return combineFMinFMax(N, DAG);
   case ISD::FMINNUM:
   case ISD::FMAXNUM:        return combineFMinNumFMaxNum(N, DAG, Subtarget);
-  case X86ISD::CVTSI2P:  
+  case X86ISD::CVTSI2P:
   case X86ISD::CVTUI2P:     return combineX86INT_TO_FP(N, DAG, DCI);
   case X86ISD::BT:          return combineBT(N, DAG, DCI);
   case ISD::ANY_EXTEND:
diff --git a/llvm/test/CodeGen/X86/extract-fp.ll b/llvm/test/CodeGen/X86/extract-fp.ll
@@ -84,3 +84,39 @@ define float @ext_frem_v4f32_constant_op0(<4 x float> %x) {
   ret float %ext
 }
 
+define float @ext_maxnum_v4f32(<4 x float> %x) nounwind {
+; CHECK-LABEL: ext_maxnum_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    maxss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>)
+  %r = extractelement <4 x float> %v, i32 2
+  ret float %r
+}
+
+define double @ext_minnum_v2f64(<2 x double> %x) nounwind {
+; CHECK-LABEL: ext_minnum_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    minsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %v = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0.0, double 1.0>, <2 x double> %x)
+  %r = extractelement <2 x double> %v, i32 1
+  ret double %r
+}
+
+;define double @ext_maximum_v4f64(<2 x double> %x) nounwind {
+;  %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 42.0, double 43.0>)
+;  %r = extractelement <2 x double> %v, i32 1
+;  ret double %r
+;}
+
+;define float @ext_minimum_v4f32(<4 x float> %x) nounwind {
+;  %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 42.0>)
+;  %r = extractelement <4 x float> %v, i32 1
+;  ret float %r
+;}
+
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll
@@ -349,5 +349,33 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double
   ret <2 x double> %r
 }
 
+define float @test_maxnum_const_op1(float %x) {
+; SSE-LABEL: test_maxnum_const_op1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    maxss {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_maxnum_const_op1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmaxss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %r = call float @llvm.maxnum.f32(float 1.0, float %x)
+  ret float %r
+}
+
+define float @test_maxnum_const_op2(float %x) {
+; SSE-LABEL: test_maxnum_const_op2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    maxss {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_maxnum_const_op2:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmaxss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %r = call float @llvm.maxnum.f32(float %x, float 1.0)
+  ret float %r
+}
+
 attributes #0 = { "no-nans-fp-math"="true" }
 
diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll
@@ -341,5 +341,33 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float>
   ret <4 x float> %r
 }
 
+define float @test_minnum_const_op1(float %x) {
+; SSE-LABEL: test_minnum_const_op1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    minss {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_minnum_const_op1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vminss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %r = call float @llvm.minnum.f32(float 1.0, float %x)
+  ret float %r
+}
+
+define float @test_minnum_const_op2(float %x) {
+; SSE-LABEL: test_minnum_const_op2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    minss {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_minnum_const_op2:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vminss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %r = call float @llvm.minnum.f32(float %x, float 1.0)
+  ret float %r
+}
+
 attributes #0 = { "no-nans-fp-math"="true" }