Skip to content

Commit

Permalink
Adding more SIMD constant folding support
Browse files Browse the repository at this point in the history
  • Loading branch information
tannergooding committed Feb 17, 2023
1 parent 2410737 commit 2171541
Show file tree
Hide file tree
Showing 3 changed files with 1,117 additions and 94 deletions.
172 changes: 170 additions & 2 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,39 @@ struct simd32_t
}
};

template <typename TBase>
TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
{
switch (oper)
{
case GT_NOT:
{
return ~arg0;
}

default:
{
unreached();
}
}
}

template <>
inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
{
uint32_t arg0Bits = *reinterpret_cast<uint32_t*>(&arg0);
uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
return *reinterpret_cast<float*>(&resultBits);
}

template <>
inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
{
uint64_t arg0Bits = *reinterpret_cast<uint64_t*>(&arg0);
uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
return *reinterpret_cast<double*>(&resultBits);
}

template <typename TBase>
TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
{
Expand All @@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)

default:
{
unreached();
return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0);
}
}
}
Expand Down Expand Up @@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}

template <typename TBase>
TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
{
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
}

template <>
inline int8_t EvaluateBinaryScalarRSZ<int8_t>(int8_t arg0, int8_t arg1)
{
uint8_t arg0Bits = static_cast<uint8_t>(arg0);
uint8_t arg1Bits = static_cast<uint8_t>(arg1);

uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t>(arg0Bits, arg1Bits);
return static_cast<int8_t>(resultBits);
}

template <>
inline int16_t EvaluateBinaryScalarRSZ<int16_t>(int16_t arg0, int16_t arg1)
{
uint16_t arg0Bits = static_cast<uint16_t>(arg0);
uint16_t arg1Bits = static_cast<uint16_t>(arg1);

uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t>(arg0Bits, arg1Bits);
return static_cast<int16_t>(resultBits);
}

template <>
inline int32_t EvaluateBinaryScalarRSZ<int32_t>(int32_t arg0, int32_t arg1)
{
uint32_t arg0Bits = static_cast<uint32_t>(arg0);
uint32_t arg1Bits = static_cast<uint32_t>(arg1);

uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t>(arg0Bits, arg1Bits);
return static_cast<int32_t>(resultBits);
}

template <>
inline int64_t EvaluateBinaryScalarRSZ<int64_t>(int64_t arg0, int64_t arg1)
{
uint64_t arg0Bits = static_cast<uint64_t>(arg0);
uint64_t arg1Bits = static_cast<uint64_t>(arg1);

uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t>(arg0Bits, arg1Bits);
return static_cast<int64_t>(resultBits);
}

template <typename TBase>
TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
{
switch (oper)
{
case GT_AND:
{
return arg0 & arg1;
}

case GT_AND_NOT:
{
return arg0 & ~arg1;
}

case GT_LSH:
{
return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1));
}

case GT_OR:
{
return arg0 | arg1;
}

case GT_RSH:
{
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
}

case GT_RSZ:
{
return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
}

case GT_XOR:
{
return arg0 ^ arg1;
}

default:
{
unreached();
}
}
}

template <>
inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
{
uint32_t arg0Bits = *reinterpret_cast<uint32_t*>(&arg0);
uint32_t arg1Bits = *reinterpret_cast<uint32_t*>(&arg1);

uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
return *reinterpret_cast<float*>(&resultBits);
}

template <>
inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
{
uint64_t arg0Bits = *reinterpret_cast<uint64_t*>(&arg0);
uint64_t arg1Bits = *reinterpret_cast<uint64_t*>(&arg1);

uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
return *reinterpret_cast<double*>(&resultBits);
}

template <typename TBase>
TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
{
Expand All @@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
return arg0 + arg1;
}

case GT_DIV:
{
return arg0 / arg1;
}

case GT_MUL:
{
return arg0 * arg1;
}

case GT_SUB:
{
return arg0 - arg1;
}

default:
{
unreached();
return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1);
}
}
}
Expand Down Expand Up @@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}

template <typename TSimd, typename TBase>
void BroadcastConstantToSimd(TSimd* result, TBase arg0)
{
uint32_t count = sizeof(TSimd) / sizeof(TBase);

for (uint32_t i = 0; i < count; i++)
{
// Safely execute `result[i] = arg0`
memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase));
}
}

#ifdef FEATURE_SIMD

#ifdef TARGET_XARCH
Expand Down
Loading

0 comments on commit 2171541

Please sign in to comment.