Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization. Use Min/Max intrinsics if one of arguments is constant. #69434

Merged
merged 13 commits into from
Jun 23, 2022
38 changes: 38 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1731,7 +1731,9 @@ struct GenTree
#endif // DEBUG

inline bool IsIntegralConst(ssize_t constVal) const;
inline bool IsFloatNaN() const;
inline bool IsFloatPositiveZero() const;
inline bool IsFloatNegativeZero() const;
inline bool IsVectorZero() const;
inline bool IsVectorAllBitsSet() const;
inline bool IsVectorConst();
Expand Down Expand Up @@ -8281,6 +8283,42 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) const
return false;
}

//-------------------------------------------------------------------
// IsFloatNaN: returns true if this is exactly a const float value of NaN
//
// Returns:
// True if this represents a const floating-point value of NaN.
// Will return false otherwise.
//
inline bool GenTree::IsFloatNaN() const
{
if (IsCnsFltOrDbl())
{
double constValue = AsDblCon()->gtDconVal;
return FloatingPointUtils::isNaN(constValue);
}

return false;
}

//-------------------------------------------------------------------
// IsFloatNegativeZero: returns true if this is exactly a const float value of negative zero (-0.0)
//
// Returns:
// True if this represents a const floating-point value of exactly negative zero (-0.0).
// Will return false if the value is negative zero (+0.0).
//
inline bool GenTree::IsFloatNegativeZero() const
{
if (IsCnsFltOrDbl())
{
double constValue = AsDblCon()->gtDconVal;
return FloatingPointUtils::isNegativeZero(constValue);
}

return false;
}

//-------------------------------------------------------------------
// IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
//
Expand Down
153 changes: 152 additions & 1 deletion src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4482,13 +4482,164 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
case NI_System_Math_Log:
case NI_System_Math_Log2:
case NI_System_Math_Log10:
#ifdef TARGET_ARM64
{
retNode = impMathIntrinsic(method, sig, callType, ni, tailCall);
break;
}
#if defined(TARGET_ARM64)
// ARM64 has fmax/fmin which are IEEE754:2019 minimum/maximum compatible
// TODO-XARCH-CQ: Enable this for XARCH when one of the arguments is a constant
// so we can then emit maxss/minss and avoid NaN/-0.0 handling
case NI_System_Math_Max:
case NI_System_Math_Min:
#endif

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
case NI_System_Math_Max:
case NI_System_Math_Min:
{
assert(varTypeIsFloating(callType));
assert(sig->numArgs == 2);

GenTreeDblCon* cnsNode = nullptr;
GenTree* otherNode = nullptr;

GenTree* op2 = impStackTop().val;
GenTree* op1 = impStackTop(1).val;

if (op2->IsCnsFltOrDbl())
{
cnsNode = op2->AsDblCon();
otherNode = op1;
}
else if (op1->IsCnsFltOrDbl())
{
cnsNode = op1->AsDblCon();
otherNode = op2;
}

if (cnsNode == nullptr)
{
// no constant node, nothing to do
break;
}

if (otherNode->IsCnsFltOrDbl())
{
// both are constant, we can fold this operation completely. Pop both peeked values

if (ni == NI_System_Math_Max)
{
cnsNode->gtDconVal =
FloatingPointUtils::maximum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal);
}
else
{
assert(ni == NI_System_Math_Min);
cnsNode->gtDconVal =
FloatingPointUtils::minimum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal);
}

retNode = cnsNode;
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

impPopStack();
impPopStack();
DEBUG_DESTROY_NODE(otherNode);

break;
}

// only one is constant, we can fold in specialized scenarios

if (cnsNode->IsFloatNaN())
{
impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
"spill side effects before propagating NaN"));

// maxsd, maxss, minsd, and minss all return op2 if either is NaN
// we require NaN to be propagated so ensure the known NaN is op2

impPopStack();
impPopStack();
DEBUG_DESTROY_NODE(otherNode);

retNode = cnsNode;
break;
}

if (ni == NI_System_Math_Max)
{
// maxsd, maxss return op2 if both inputs are 0 of either sign
// we require +0 to be greater than -0, so we can't handle if
// the known constant is +0. This is because if the unknown value
// is -0, we'd need the cns to be op2. But if the unknown value
// is NaN, we'd need the cns to be op1 instead.

if (cnsNode->IsFloatPositiveZero())
{
break;
}

// Given the checks, op1 can safely be the cns and op2 the other node

ni = (callType == TYP_DOUBLE) ? NI_SSE2_Max : NI_SSE_Max;

// one is constant and we know its something we can handle, so pop both peeked values

op1 = cnsNode;
op2 = otherNode;
}
else
{
assert(ni == NI_System_Math_Min);

// minsd, minss return op2 if both inputs are 0 of either sign
// we require -0 to be lesser than +0, so we can't handle if
// the known constant is -0. This is because if the unknown value
// is +0, we'd need the cns to be op2. But if the unknown value
// is NaN, we'd need the cns to be op1 instead.

if (cnsNode->IsFloatNegativeZero())
{
break;
}

// Given the checks, op1 can safely be the cns and op2 the other node

ni = (callType == TYP_DOUBLE) ? NI_SSE2_Min : NI_SSE_Min;

// one is constant and we know its something we can handle, so pop both peeked values

op1 = cnsNode;
op2 = otherNode;
}

assert(op1->IsCnsFltOrDbl() && !op2->IsCnsFltOrDbl());

impPopStack();
impPopStack();

GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16, callJitType);

if (callJitType == CORINFO_TYPE_FLOAT)
{
vecCon->gtSimd16Val.f32[0] = (float)op1->AsDblCon()->gtDconVal;
}
else
{
vecCon->gtSimd16Val.f64[0] = op1->AsDblCon()->gtDconVal;
}

op1 = vecCon;
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_CreateScalarUnsafe, callJitType, 16);

retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, ni, callJitType, 16);
retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector128_ToScalar, callJitType, 16);

break;
}
#endif

case NI_System_Math_Pow:
case NI_System_Math_Round:
case NI_System_Math_Sin:
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2294,6 +2294,22 @@ bool FloatingPointUtils::isNaN(double val)
return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL;
}

//------------------------------------------------------------------------
// isNegativeZero: Determines whether the specified value is negative zero (-0.0)
//
// Arguments:
// val - value to check for (-0.0)
//
// Return Value:
// True if val is (-0.0)
//

bool FloatingPointUtils::isNegativeZero(double val)
{
UINT64 bits = *reinterpret_cast<UINT64*>(&val);
return bits == 0x8000000000000000ULL;
}

//------------------------------------------------------------------------
// maximum: This matches the IEEE 754:2019 `maximum` function
// It propagates NaN inputs back to the caller and
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,8 @@ class FloatingPointUtils

static bool isNaN(double val);

static bool isNegativeZero(double val);

static double maximum(double val1, double val2);

static float maximum(float val1, float val2);
Expand Down