Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding more SIMD constant folding support #82190

Merged
merged 5 commits into from
Feb 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 170 additions & 2 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,39 @@ struct simd32_t
}
};

template <typename TBase>
TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
{
switch (oper)
{
case GT_NOT:
{
return ~arg0;
}

default:
{
unreached();
}
}
}

template <>
inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
{
uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
return BitOperations::UInt32BitsToSingle(resultBits);
}

template <>
inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
{
uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
return BitOperations::UInt64BitsToDouble(resultBits);
}

template <typename TBase>
TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
{
Expand All @@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)

default:
{
unreached();
return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0);
}
}
}
Expand Down Expand Up @@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}

template <typename TBase>
TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
{
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
}

template <>
inline int8_t EvaluateBinaryScalarRSZ<int8_t>(int8_t arg0, int8_t arg1)
{
uint8_t arg0Bits = static_cast<uint8_t>(arg0);
uint8_t arg1Bits = static_cast<uint8_t>(arg1);

uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t>(arg0Bits, arg1Bits);
return static_cast<int8_t>(resultBits);
}

template <>
inline int16_t EvaluateBinaryScalarRSZ<int16_t>(int16_t arg0, int16_t arg1)
{
uint16_t arg0Bits = static_cast<uint16_t>(arg0);
uint16_t arg1Bits = static_cast<uint16_t>(arg1);

uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t>(arg0Bits, arg1Bits);
return static_cast<int16_t>(resultBits);
}

template <>
inline int32_t EvaluateBinaryScalarRSZ<int32_t>(int32_t arg0, int32_t arg1)
{
uint32_t arg0Bits = static_cast<uint32_t>(arg0);
uint32_t arg1Bits = static_cast<uint32_t>(arg1);

uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t>(arg0Bits, arg1Bits);
return static_cast<int32_t>(resultBits);
}

template <>
inline int64_t EvaluateBinaryScalarRSZ<int64_t>(int64_t arg0, int64_t arg1)
{
uint64_t arg0Bits = static_cast<uint64_t>(arg0);
uint64_t arg1Bits = static_cast<uint64_t>(arg1);

uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t>(arg0Bits, arg1Bits);
return static_cast<int64_t>(resultBits);
}

template <typename TBase>
TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
{
switch (oper)
{
case GT_AND:
{
return arg0 & arg1;
}

case GT_AND_NOT:
{
return arg0 & ~arg1;
}

case GT_LSH:
{
return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1));
}

case GT_OR:
{
return arg0 | arg1;
}

case GT_RSH:
{
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
}

case GT_RSZ:
{
return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
}

case GT_XOR:
{
return arg0 ^ arg1;
}

default:
{
unreached();
}
}
}

template <>
inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
{
uint32_t arg0Bits = BitOperations::SingleToUInt32Bits(arg0);
uint32_t arg1Bits = BitOperations::SingleToUInt32Bits(arg1);

uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
return BitOperations::UInt32BitsToSingle(resultBits);
}

template <>
inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
{
uint64_t arg0Bits = BitOperations::DoubleToUInt64Bits(arg0);
uint64_t arg1Bits = BitOperations::DoubleToUInt64Bits(arg1);

uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
return BitOperations::UInt64BitsToDouble(resultBits);
}

template <typename TBase>
TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
{
Expand All @@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
return arg0 + arg1;
}

case GT_DIV:
{
return arg0 / arg1;
}

case GT_MUL:
{
return arg0 * arg1;
}

case GT_SUB:
{
return arg0 - arg1;
}

default:
{
unreached();
return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1);
}
}
}
Expand Down Expand Up @@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}

template <typename TSimd, typename TBase>
void BroadcastConstantToSimd(TSimd* result, TBase arg0)
{
uint32_t count = sizeof(TSimd) / sizeof(TBase);

for (uint32_t i = 0; i < count; i++)
{
// Safely execute `result[i] = arg0`
memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase));
}
}

#ifdef FEATURE_SIMD

#ifdef TARGET_XARCH
Expand Down
64 changes: 64 additions & 0 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2645,6 +2645,22 @@ uint32_t BitOperations::BitScanReverse(uint64_t value)
#endif
}

//------------------------------------------------------------------------
// BitOperations::DoubleToUInt64Bits: Gets the underlying bits for a double-precision floating-point value.
//
// Arguments:
// value - The number to convert
//
// Return Value:
// The underlying bits for value.
//
uint64_t BitOperations::DoubleToUInt64Bits(double value)
{
uint64_t result;
memcpy(&result, &value, sizeof(double));
return result;
}

//------------------------------------------------------------------------
// BitOperations::LeadingZeroCount: Count the number of leading zero bits in a mask.
//
Expand Down Expand Up @@ -2932,6 +2948,22 @@ uint64_t BitOperations::RotateRight(uint64_t value, uint32_t offset)
return (value >> (offset & 0x3F)) | (value << ((64 - offset) & 0x3F));
}

//------------------------------------------------------------------------
// BitOperations::SingleToUInt32Bits: Gets the underlying bits for a single-precision floating-point value.
//
// Arguments:
// value - The number to convert
//
// Return Value:
// The underlying bits for value.
//
uint32_t BitOperations::SingleToUInt32Bits(float value)
{
uint32_t result;
memcpy(&result, &value, sizeof(float));
return result;
}

//------------------------------------------------------------------------
// BitOperations::TrailingZeroCount: Count the number of trailing zero bits in an integer value.
//
Expand Down Expand Up @@ -2980,6 +3012,38 @@ uint32_t BitOperations::TrailingZeroCount(uint64_t value)
#endif
}

//------------------------------------------------------------------------
// BitOperations::UInt32BitsToSingle: Gets a single-precision floating-point from its underlying bit value.
//
// Arguments:
// value - The underlying bit value.
//
// Return Value:
// The single-precision floating-point from value.
//
float BitOperations::UInt32BitsToSingle(uint32_t value)
{
float result;
memcpy(&result, &value, sizeof(uint32_t));
return result;
}

//------------------------------------------------------------------------
// BitOperations::UInt64BitsToDouble: Gets a double-precision floating-point from its underlying bit value.
//
// Arguments:
// value - The underlying bit value.
//
// Return Value:
// The double-precision floating-point from value.
//
double BitOperations::UInt64BitsToDouble(uint64_t value)
{
double result;
memcpy(&result, &value, sizeof(uint64_t));
return result;
}

namespace MagicDivide
{
template <int TableBase = 0, int TableSize, typename Magic>
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ class BitOperations

static uint32_t BitScanReverse(uint64_t value);

static uint64_t DoubleToUInt64Bits(double value);

static uint32_t LeadingZeroCount(uint32_t value);

static uint32_t LeadingZeroCount(uint64_t value);
Expand All @@ -775,9 +777,15 @@ class BitOperations

static uint64_t RotateRight(uint64_t value, uint32_t offset);

static uint32_t SingleToUInt32Bits(float value);

static uint32_t TrailingZeroCount(uint32_t value);

static uint32_t TrailingZeroCount(uint64_t value);

static float UInt32BitsToSingle(uint32_t value);

static double UInt64BitsToDouble(uint64_t value);
};

// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but
Expand Down
Loading