Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update HWIntrinsicInfo::lookupId to use a binary search #103778

Merged
merged 9 commits into from
Jun 24, 2024
289 changes: 269 additions & 20 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,213 @@ CorInfoType Compiler::getBaseJitTypeFromArgIfNeeded(NamedIntrinsic intrins
return (diffInsCount >= 2);
}

struct HWIntrinsicIsaRange
{
NamedIntrinsic FirstId;
NamedIntrinsic LastId;
};

static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
// clang-format off
#if defined(TARGET_XARCH)
{ FIRST_NI_X86Base, LAST_NI_X86Base },
{ FIRST_NI_SSE, LAST_NI_SSE },
{ FIRST_NI_SSE2, LAST_NI_SSE2 },
{ FIRST_NI_SSE3, LAST_NI_SSE3 },
{ FIRST_NI_SSSE3, LAST_NI_SSSE3 },
{ FIRST_NI_SSE41, LAST_NI_SSE41 },
{ FIRST_NI_SSE42, LAST_NI_SSE42 },
{ FIRST_NI_AVX, LAST_NI_AVX },
{ FIRST_NI_AVX2, LAST_NI_AVX2 },
{ FIRST_NI_AES, LAST_NI_AES },
{ FIRST_NI_BMI1, LAST_NI_BMI1 },
{ FIRST_NI_BMI2, LAST_NI_BMI2 },
{ FIRST_NI_FMA, LAST_NI_FMA },
{ FIRST_NI_LZCNT, LAST_NI_LZCNT },
{ FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ },
{ FIRST_NI_POPCNT, LAST_NI_POPCNT },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ FIRST_NI_Vector256, LAST_NI_Vector256 },
{ FIRST_NI_Vector512, LAST_NI_Vector512 },
{ FIRST_NI_AVXVNNI, LAST_NI_AVXVNNI },
{ NI_Illegal, NI_Illegal }, // MOVBE
{ FIRST_NI_X86Serialize, LAST_NI_X86Serialize },
{ NI_Illegal, NI_Illegal }, // EVEX
{ FIRST_NI_AVX512F, LAST_NI_AVX512F },
{ FIRST_NI_AVX512F_VL, LAST_NI_AVX512F_VL },
{ FIRST_NI_AVX512BW, LAST_NI_AVX512BW },
{ FIRST_NI_AVX512BW_VL, LAST_NI_AVX512BW_VL },
{ FIRST_NI_AVX512CD, LAST_NI_AVX512CD },
{ FIRST_NI_AVX512CD_VL, LAST_NI_AVX512CD_VL },
{ FIRST_NI_AVX512DQ, LAST_NI_AVX512DQ },
{ FIRST_NI_AVX512DQ_VL, LAST_NI_AVX512DQ_VL },
{ FIRST_NI_AVX512VBMI, LAST_NI_AVX512VBMI },
{ FIRST_NI_AVX512VBMI_VL, LAST_NI_AVX512VBMI_VL },
{ FIRST_NI_AVX10v1, LAST_NI_AVX10v1 },
{ FIRST_NI_AVX10v1_V512, LAST_NI_AVX10v1_V512 },
{ NI_Illegal, NI_Illegal }, // VectorT128
{ NI_Illegal, NI_Illegal }, // VectorT256
{ NI_Illegal, NI_Illegal }, // VectorT512
{ FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 },
{ FIRST_NI_SSE_X64, LAST_NI_SSE_X64 },
{ FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 },
{ NI_Illegal, NI_Illegal }, // SSE3_X64
{ NI_Illegal, NI_Illegal }, // SSSE3_X64
{ FIRST_NI_SSE41_X64, LAST_NI_SSE41_X64 },
{ FIRST_NI_SSE42_X64, LAST_NI_SSE42_X64 },
{ NI_Illegal, NI_Illegal }, // AVX_X64
{ NI_Illegal, NI_Illegal }, // AVX2_X64
{ NI_Illegal, NI_Illegal }, // AES_X64
{ FIRST_NI_BMI1_X64, LAST_NI_BMI1_X64 },
{ FIRST_NI_BMI2_X64, LAST_NI_BMI2_X64 },
{ NI_Illegal, NI_Illegal }, // FMA_X64
{ FIRST_NI_LZCNT_X64, LAST_NI_LZCNT_X64 },
{ NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64
{ FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 },
{ NI_Illegal, NI_Illegal }, // AVXVNNI_X64
{ NI_Illegal, NI_Illegal }, // MOVBE_X64
{ NI_Illegal, NI_Illegal }, // X86Serialize_X64
{ NI_Illegal, NI_Illegal }, // EVEX_X64
{ FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 },
{ NI_Illegal, NI_Illegal }, // AVX512F_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512BW_X64
{ NI_Illegal, NI_Illegal }, // AVX512BW_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512DQ_X64
{ NI_Illegal, NI_Illegal }, // AVX512DQ_VL_X64
{ NI_Illegal, NI_Illegal }, // AVX512VBMI_X64
{ NI_Illegal, NI_Illegal }, // AVX512VBMI_VL_X64
{ FIRST_NI_AVX10v1_X64, LAST_NI_AVX10v1_X64 },
{ NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64
#elif defined (TARGET_ARM64)
{ FIRST_NI_ArmBase, LAST_NI_ArmBase },
{ FIRST_NI_AdvSimd, LAST_NI_AdvSimd },
{ FIRST_NI_Aes, LAST_NI_Aes },
{ FIRST_NI_Crc32, LAST_NI_Crc32 },
{ FIRST_NI_Dp, LAST_NI_Dp },
{ FIRST_NI_Rdm, LAST_NI_Rdm },
{ FIRST_NI_Sha1, LAST_NI_Sha1 },
{ FIRST_NI_Sha256, LAST_NI_Sha256 },
{ NI_Illegal, NI_Illegal }, // Atomics
{ FIRST_NI_Vector64, LAST_NI_Vector64 },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ NI_Illegal, NI_Illegal }, // Dczva
{ NI_Illegal, NI_Illegal }, // Rcpc
{ NI_Illegal, NI_Illegal }, // VectorT128
{ NI_Illegal, NI_Illegal }, // Rcpc2
{ FIRST_NI_Sve, LAST_NI_Sve },
{ FIRST_NI_ArmBase_Arm64, LAST_NI_ArmBase_Arm64 },
{ FIRST_NI_AdvSimd_Arm64, LAST_NI_AdvSimd_Arm64 },
{ NI_Illegal, NI_Illegal }, // Aes_Arm64
{ FIRST_NI_Crc32_Arm64, LAST_NI_Crc32_Arm64 },
{ NI_Illegal, NI_Illegal }, // Dp_Arm64
{ FIRST_NI_Rdm_Arm64, LAST_NI_Rdm_Arm64 },
{ NI_Illegal, NI_Illegal }, // Sha1_Arm64
{ NI_Illegal, NI_Illegal }, // Sha256_Arm64
{ NI_Illegal, NI_Illegal }, // Sve_Arm64
#else
#error Unsupported platform
#endif
// clang-format on
};

#if defined(DEBUG)
static void ValidateHWIntrinsicInfo(CORINFO_InstructionSet isa, NamedIntrinsic ni, const HWIntrinsicInfo& info)
{
// We should have found the entry we expected to find here
assert(info.id == ni);

// It should belong to the expected ISA
assert(info.isa == isa);

if ((info.simdSize != -1) && (info.simdSize != 0))
{
// We should only have known SIMD sizes
#if defined(TARGET_ARM64)
assert((info.simdSize == 8) || (info.simdSize == 16));
#elif defined(TARGET_XARCH)
assert((info.simdSize == 16) || (info.simdSize == 32) || (info.simdSize == 64));
#else
unreached();
#endif
}

if (info.numArgs != -1)
{
// We should only have an expected number of arguments
#if defined(TARGET_ARM64)
assert((info.numArgs >= 0) && (info.numArgs <= 4));
#elif defined(TARGET_XARCH)
assert((info.numArgs >= 0) && (info.numArgs <= 5));
#else
unreached();
#endif
}

// TODO: There's more we could validate here in terms of flags, instructions used, etc.
// Some of this is already done ad-hoc elsewhere throughout the JIT
}

static void ValidateHWIntrinsicIsaRange(CORINFO_InstructionSet isa, const HWIntrinsicIsaRange& isaRange)
{
// Both entries should be illegal if either is
if (isaRange.FirstId == NI_Illegal)
{
assert(isaRange.LastId == NI_Illegal);
return;
}
assert(isaRange.LastId != NI_Illegal);

// Both entries should belong to the expected ISA
assert(HWIntrinsicInfo::lookupIsa(isaRange.FirstId) == isa);
assert(HWIntrinsicInfo::lookupIsa(isaRange.LastId) == isa);

// The last ID should be the same as or after the first ID
assert(isaRange.FirstId <= isaRange.LastId);

// The ID before the range should not be part of the expected ISA
NamedIntrinsic prevId = static_cast<NamedIntrinsic>(isaRange.FirstId - 1);
assert((prevId == NI_HW_INTRINSIC_START) || (HWIntrinsicInfo::lookupIsa(prevId) != isa));

// The ID after the range should not be part of the expected ISA
NamedIntrinsic nextId = static_cast<NamedIntrinsic>(isaRange.LastId + 1);
#if defined(TARGET_ARM64)
assert((nextId == NI_HW_INTRINSIC_END) || (HWIntrinsicInfo::lookupIsa(nextId) != isa) ||
(nextId == SPECIAL_NI_Sve));
#else
assert((nextId == NI_HW_INTRINSIC_END) || (HWIntrinsicInfo::lookupIsa(nextId) != isa));
#endif

NamedIntrinsic ni = static_cast<NamedIntrinsic>(isaRange.FirstId);
const HWIntrinsicInfo* prevInfo = &HWIntrinsicInfo::lookup(ni);
ValidateHWIntrinsicInfo(isa, ni, *prevInfo);

size_t count = (isaRange.LastId - isaRange.FirstId) + 1;

for (size_t i = 1; i < count; i++)
{
ni = static_cast<NamedIntrinsic>(isaRange.FirstId + i);
const HWIntrinsicInfo* info = &HWIntrinsicInfo::lookup(ni);
ValidateHWIntrinsicInfo(isa, ni, *info);

// The current name should be sorted after the previous
assert(strcmp(info->name, prevInfo->name) > 0);

prevInfo = info;
}
}

static void ValidateHWIntrinsicIsaRangeArray()
{
for (size_t i = 0; i < ARRAY_SIZE(hwintrinsicIsaRangeArray); i++)
{
CORINFO_InstructionSet isa = static_cast<CORINFO_InstructionSet>(i + 1);
ValidateHWIntrinsicIsaRange(isa, hwintrinsicIsaRangeArray[i]);
}
}
#endif

//------------------------------------------------------------------------
// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
//
Expand All @@ -487,7 +694,16 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
const char* methodName,
const char* enclosingClassName)
{
// TODO-Throughput: replace sequential search by binary search
#if defined(DEBUG)
static bool validationCompleted = false;

if (!validationCompleted)
{
ValidateHWIntrinsicIsaRangeArray();
validationCompleted = true;
}
#endif // DEBUG

CORINFO_InstructionSet isa = lookupIsa(className, enclosingClassName);

if (isa == InstructionSet_ILLEGAL)
Expand All @@ -496,9 +712,22 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
}

bool isIsaSupported = comp->compSupportsHWIntrinsic(isa);
bool isHardwareAcceleratedProp = (strcmp(methodName, "get_IsHardwareAccelerated") == 0);
bool isHardwareAcceleratedProp = false;
bool isSupportedProp = false;
uint32_t vectorByteLength = 0;

if (strncmp(methodName, "get_Is", 6) == 0)
{
if (strcmp(methodName + 6, "HardwareAccelerated") == 0)
{
isHardwareAcceleratedProp = true;
}
else if (strcmp(methodName + 6, "Supported") == 0)
{
isSupportedProp = true;
}
}

#ifdef TARGET_XARCH
if (isHardwareAcceleratedProp)
{
Expand All @@ -507,26 +736,29 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
// still can be cases where e.g. Sse41 might give an additional boost for Vector128, but it's
// not important enough to bump the minimal Sse version here)

if (strcmp(className, "Vector128") == 0)
if (isa == InstructionSet_Vector128)
{
isa = InstructionSet_SSE2;
vectorByteLength = 16;
}
else if (strcmp(className, "Vector256") == 0)
else if (isa == InstructionSet_Vector256)
{
isa = InstructionSet_AVX2;
vectorByteLength = 32;
}
else if (strcmp(className, "Vector512") == 0)
else if (isa == InstructionSet_Vector512)
{
isa = InstructionSet_AVX512F;
vectorByteLength = 64;
}
else
{
assert((strcmp(className, "Vector128") != 0) && (strcmp(className, "Vector256") != 0) &&
(strcmp(className, "Vector512") != 0));
}
}
#endif

bool isSupportedProp = (strcmp(methodName, "get_IsSupported") == 0);

if (isSupportedProp && (strncmp(className, "Vector", 6) == 0))
{
// The Vector*<T>.IsSupported props report if T is supported & is specially handled in lookupNamedIntrinsic
Expand Down Expand Up @@ -621,33 +853,50 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
}
#endif

for (int i = 0; i < (NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); i++)
size_t isaIndex = static_cast<size_t>(isa) - 1;
assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray));

const HWIntrinsicIsaRange& isaRange = hwintrinsicIsaRangeArray[isaIndex];

if (isaRange.FirstId == NI_Illegal)
{
const HWIntrinsicInfo& intrinsicInfo = hwIntrinsicInfoArray[i];
return NI_Illegal;
}

if (isa != hwIntrinsicInfoArray[i].isa)
{
continue;
}
size_t rangeLower = isaRange.FirstId;
size_t rangeUpper = isaRange.LastId;

while (rangeLower <= rangeUpper)
{
// This is safe since rangeLower and rangeUpper will never be negative
size_t rangeIndex = (rangeUpper + rangeLower) / 2;

int numArgs = static_cast<unsigned>(intrinsicInfo.numArgs);
NamedIntrinsic ni = static_cast<NamedIntrinsic>(rangeIndex);
const HWIntrinsicInfo& intrinsicInfo = HWIntrinsicInfo::lookup(ni);

if ((numArgs != -1) && (sig->numArgs != static_cast<unsigned>(intrinsicInfo.numArgs)))
int sortOrder = strcmp(methodName, intrinsicInfo.name);

if (sortOrder < 0)
{
continue;
rangeUpper = rangeIndex - 1;
}

if (strcmp(methodName, intrinsicInfo.name) == 0)
else if (sortOrder > 0)
{
NamedIntrinsic ni = intrinsicInfo.id;
rangeLower = rangeIndex + 1;
}
else
{
assert(sortOrder == 0);
assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast<uint8_t>(intrinsicInfo.numArgs)));

#if defined(TARGET_XARCH)
// on AVX1-only CPUs we only support a subset of intrinsics in Vector256
if (isLimitedVector256Isa && !AvxOnlyCompatible(ni))
{
return NI_Illegal;
}
#endif
#endif // TARGET_XARCH

return ni;
}
}
Expand Down
19 changes: 14 additions & 5 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,24 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, const c
{
assert(className != nullptr);

if (strcmp(className, "Arm64") == 0)
if (enclosingClassName == nullptr)
{
assert(enclosingClassName != nullptr);
return Arm64VersionOfIsa(lookupInstructionSet(enclosingClassName));
// No nested class is the most common, so fast path it
return lookupInstructionSet(className);
}
else

// Since lookupId is only called for the xplat intrinsics
// or intrinsics in the platform specific namespace, we assume
// that it will be one we can handle and don't try to early out.

CORINFO_InstructionSet enclosingIsa = lookupInstructionSet(enclosingClassName);

if (strcmp(className, "Arm64") == 0)
{
return lookupInstructionSet(className);
return Arm64VersionOfIsa(enclosingIsa);
}

return InstructionSet_ILLEGAL;
}

//------------------------------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2087,11 +2087,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Sve_Compute32BitAddresses:
case NI_Sve_Compute64BitAddresses:
{
static_assert_no_msg(AreContiguous(NI_Sve_Compute8BitAddresses, NI_Sve_Compute16BitAddresses,
NI_Sve_Compute32BitAddresses, NI_Sve_Compute64BitAddresses));
static_assert_no_msg(AreContiguous(NI_Sve_Compute16BitAddresses, NI_Sve_Compute32BitAddresses,
NI_Sve_Compute64BitAddresses, NI_Sve_Compute8BitAddresses));

GetEmitter()->emitInsSve_R_R_R_I(ins, EA_SCALABLE, targetReg, op1Reg, op2Reg,
(intrin.id - NI_Sve_Compute8BitAddresses), opt,
(intrin.id - NI_Sve_Compute16BitAddresses), opt,
INS_SCALABLE_OPTS_LSL_N);
break;
}
Expand Down
Loading
Loading