From d10a78217f61a1113624eff3747a17cf2907297a Mon Sep 17 00:00:00 2001 From: Clinton Ingram Date: Tue, 28 Apr 2020 18:38:04 -0700 Subject: [PATCH] Add BSF and BSR fallbacks for BitOperations methods (#34550) * implement X86Base, add BSF and BSR intrinsics * add X86Base to Utf8String S.R.I. shim * add BSF and BSR fallbacks in BitOperations * add X86Base to zapinfo * disable 'w' size bit for BSF and BSR --- src/coreclr/src/inc/corinfo.h | 10 +- src/coreclr/src/inc/corinfoinstructionset.h | 122 +++++++++++------- .../src/inc/readytoruninstructionset.h | 1 + src/coreclr/src/jit/codegen.h | 1 + src/coreclr/src/jit/compiler.cpp | 9 +- src/coreclr/src/jit/emitxarch.cpp | 13 +- .../src/jit/hwintrinsiccodegenxarch.cpp | 38 ++++++ src/coreclr/src/jit/hwintrinsiclistxarch.h | 16 +++ src/coreclr/src/jit/hwintrinsicxarch.cpp | 10 ++ src/coreclr/src/jit/instrsxarch.h | 3 + .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 3 + .../JitInterface/CorInfoInstructionSet.cs | 122 +++++++++++------- .../ThunkGenerator/InstructionSetDesc.txt | 3 + .../crossgen2/jitinterface/jitwrapper.cpp | 10 +- src/coreclr/src/vm/codeman.cpp | 2 + src/coreclr/src/zap/zapinfo.cpp | 2 +- src/coreclr/src/zap/zapper.cpp | 1 + .../System.Private.CoreLib.Shared.projitems | 2 + .../src/System/Numerics/BitOperations.cs | 54 ++++++-- .../X86/X86Base.PlatformNotSupported.cs | 66 ++++++++++ .../System/Runtime/Intrinsics/X86/X86Base.cs | 67 ++++++++++ .../Runtime/Intrinsics/Intrinsics.Shims.cs | 12 ++ 23 files changed, 439 insertions(+), 129 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.PlatformNotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.cs diff --git a/src/coreclr/src/inc/corinfo.h b/src/coreclr/src/inc/corinfo.h index 9467045001daf..fb4489ceb87f8 100644 --- a/src/coreclr/src/inc/corinfo.h +++ b/src/coreclr/src/inc/corinfo.h @@ -217,11 +217,11 @@ TODO: Talk about initializing strutures before use #endif #endif -SELECTANY const GUID JITEEVersionIdentifier = { /* bb6ea6c3-ce5a-4543-86b7-c9c88f9ec780 */ - 0xbb6ea6c3, - 0xce5a, - 0x4543, - { 0x86, 0xb7, 0xc9, 0xc8, 0x8f, 0x9e, 0xc7, 0x80 } +SELECTANY const GUID JITEEVersionIdentifier = { /* 8b2226a2-ac30-4f5c-ae5c-926c792ecdb9 */ + 0x8b2226a2, + 0xac30, + 0x4f5c, + { 0xae, 0x5c, 0x92, 0x6c, 0x79, 0x2e, 0xcd, 0xb9 } }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/src/inc/corinfoinstructionset.h b/src/coreclr/src/inc/corinfoinstructionset.h index e689f03d8557d..8040819439a00 100644 --- a/src/coreclr/src/inc/corinfoinstructionset.h +++ b/src/coreclr/src/inc/corinfoinstructionset.h @@ -32,58 +32,62 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=12, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 - InstructionSet_SSE=1, - InstructionSet_SSE2=2, - InstructionSet_SSE3=3, - InstructionSet_SSSE3=4, - InstructionSet_SSE41=5, - InstructionSet_SSE42=6, - InstructionSet_AVX=7, - InstructionSet_AVX2=8, - InstructionSet_AES=9, - InstructionSet_BMI1=10, - InstructionSet_BMI2=11, - InstructionSet_FMA=12, - InstructionSet_LZCNT=13, - InstructionSet_PCLMULQDQ=14, - InstructionSet_POPCNT=15, - InstructionSet_Vector128=16, - InstructionSet_Vector256=17, - InstructionSet_BMI1_X64=18, - InstructionSet_BMI2_X64=19, - InstructionSet_LZCNT_X64=20, - InstructionSet_POPCNT_X64=21, - InstructionSet_SSE_X64=22, - InstructionSet_SSE2_X64=23, - InstructionSet_SSE41_X64=24, - InstructionSet_SSE42_X64=25, + InstructionSet_X86Base=1, + InstructionSet_SSE=2, + InstructionSet_SSE2=3, + InstructionSet_SSE3=4, + InstructionSet_SSSE3=5, + InstructionSet_SSE41=6, + InstructionSet_SSE42=7, + InstructionSet_AVX=8, + InstructionSet_AVX2=9, + InstructionSet_AES=10, + InstructionSet_BMI1=11, + InstructionSet_BMI2=12, + InstructionSet_FMA=13, + InstructionSet_LZCNT=14, + InstructionSet_PCLMULQDQ=15, + InstructionSet_POPCNT=16, + InstructionSet_Vector128=17, + InstructionSet_Vector256=18, + InstructionSet_X86Base_X64=19, + InstructionSet_BMI1_X64=20, + InstructionSet_BMI2_X64=21, + InstructionSet_LZCNT_X64=22, + InstructionSet_POPCNT_X64=23, + InstructionSet_SSE_X64=24, + InstructionSet_SSE2_X64=25, + InstructionSet_SSE41_X64=26, + InstructionSet_SSE42_X64=27, #endif // TARGET_AMD64 #ifdef TARGET_X86 - InstructionSet_SSE=1, - InstructionSet_SSE2=2, - InstructionSet_SSE3=3, - InstructionSet_SSSE3=4, - InstructionSet_SSE41=5, - InstructionSet_SSE42=6, - InstructionSet_AVX=7, - InstructionSet_AVX2=8, - InstructionSet_AES=9, - InstructionSet_BMI1=10, - InstructionSet_BMI2=11, - InstructionSet_FMA=12, - InstructionSet_LZCNT=13, - InstructionSet_PCLMULQDQ=14, - InstructionSet_POPCNT=15, - InstructionSet_Vector128=16, - InstructionSet_Vector256=17, - InstructionSet_BMI1_X64=18, - InstructionSet_BMI2_X64=19, - InstructionSet_LZCNT_X64=20, - InstructionSet_POPCNT_X64=21, - InstructionSet_SSE_X64=22, - InstructionSet_SSE2_X64=23, - InstructionSet_SSE41_X64=24, - InstructionSet_SSE42_X64=25, + InstructionSet_X86Base=1, + InstructionSet_SSE=2, + InstructionSet_SSE2=3, + InstructionSet_SSE3=4, + InstructionSet_SSSE3=5, + InstructionSet_SSE41=6, + InstructionSet_SSE42=7, + InstructionSet_AVX=8, + InstructionSet_AVX2=9, + InstructionSet_AES=10, + InstructionSet_BMI1=11, + InstructionSet_BMI2=12, + InstructionSet_FMA=13, + InstructionSet_LZCNT=14, + InstructionSet_PCLMULQDQ=15, + InstructionSet_POPCNT=16, + InstructionSet_Vector128=17, + InstructionSet_Vector256=18, + InstructionSet_X86Base_X64=19, + InstructionSet_BMI1_X64=20, + InstructionSet_BMI2_X64=21, + InstructionSet_LZCNT_X64=22, + InstructionSet_POPCNT_X64=23, + InstructionSet_SSE_X64=24, + InstructionSet_SSE2_X64=25, + InstructionSet_SSE41_X64=26, + InstructionSet_SSE42_X64=27, #endif // TARGET_X86 }; @@ -139,6 +143,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_Crc32_Arm64); #endif // TARGET_ARM64 #ifdef TARGET_AMD64 + if (HasInstructionSet(InstructionSet_X86Base)) + AddInstructionSet(InstructionSet_X86Base_X64); if (HasInstructionSet(InstructionSet_SSE)) AddInstructionSet(InstructionSet_SSE_X64); if (HasInstructionSet(InstructionSet_SSE2)) @@ -204,6 +210,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Sha256); #endif // TARGET_ARM64 #ifdef TARGET_AMD64 + if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64)) + resultflags.RemoveInstructionSet(InstructionSet_X86Base); + if (resultflags.HasInstructionSet(InstructionSet_X86Base_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_X86Base_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_SSE_X64)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -236,6 +246,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_POPCNT); if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT)) resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64); + if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_SSE2); if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) @@ -264,6 +276,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_POPCNT); #endif // TARGET_AMD64 #ifdef TARGET_X86 + if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_SSE2); if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) @@ -332,6 +346,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector128"; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 + case InstructionSet_X86Base : + return "X86Base"; + case InstructionSet_X86Base_X64 : + return "X86Base_X64"; case InstructionSet_SSE : return "SSE"; case InstructionSet_SSE_X64 : @@ -384,6 +402,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector256"; #endif // TARGET_AMD64 #ifdef TARGET_X86 + case InstructionSet_X86Base : + return "X86Base"; case InstructionSet_SSE : return "SSE"; case InstructionSet_SSE2 : @@ -447,6 +467,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 + case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE; case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; @@ -464,6 +485,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; #endif // TARGET_AMD64 #ifdef TARGET_X86 + case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE; case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; diff --git a/src/coreclr/src/inc/readytoruninstructionset.h b/src/coreclr/src/inc/readytoruninstructionset.h index 6e6f2549f9044..77f1cd267672e 100644 --- a/src/coreclr/src/inc/readytoruninstructionset.h +++ b/src/coreclr/src/inc/readytoruninstructionset.h @@ -32,6 +32,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Sha1=19, READYTORUN_INSTRUCTION_Sha256=20, READYTORUN_INSTRUCTION_Atomics=21, + READYTORUN_INSTRUCTION_X86Base=22, }; diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h index 9e22d9780121c..0b76f07284b13 100644 --- a/src/coreclr/src/jit/codegen.h +++ b/src/coreclr/src/jit/codegen.h @@ -1028,6 +1028,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genHWIntrinsic_R_R_R_RM( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3); void genBaseIntrinsic(GenTreeHWIntrinsic* node); + void genX86BaseIntrinsic(GenTreeHWIntrinsic* node); void genSSEIntrinsic(GenTreeHWIntrinsic* node); void genSSE2Intrinsic(GenTreeHWIntrinsic* node); void genSSE41Intrinsic(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index dd5530628883e..4779a6fa90c48 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -2161,7 +2161,7 @@ const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs) void Compiler::compSetProcessor() { // - // NOTE: This function needs to be kept in sync with EEJitManager::SetCpuInfo() in vm\codemap.cpp + // NOTE: This function needs to be kept in sync with EEJitManager::SetCpuInfo() in vm\codeman.cpp // const JitFlags& jitFlags = *opts.jitFlags; @@ -2195,13 +2195,14 @@ void Compiler::compSetProcessor() #endif // TARGET_X86 + // The VM will set the ISA flags depending on actual hardware support. + // We then select which ISAs to leave enabled based on the JIT config. + // The exception to this is the dummy Vector64/128/256 ISAs, which must be added explicitly. CORINFO_InstructionSetFlags instructionSetFlags = jitFlags.GetInstructionSetFlags(); opts.compSupportsISA = 0; opts.compSupportsISAReported = 0; #ifdef TARGET_XARCH - bool avxSupported = false; - if (JitConfig.EnableHWIntrinsic()) { // Dummy ISAs for simplifying the JIT code @@ -2315,8 +2316,6 @@ void Compiler::compSetProcessor() if (JitConfig.EnableHWIntrinsic()) { // Dummy ISAs for simplifying the JIT code - instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); - instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase_Arm64); instructionSetFlags.AddInstructionSet(InstructionSet_Vector64); instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); } diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index 74371a8cb9779..269324d9a9d4f 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -9448,7 +9448,8 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Use the large version if this is not a byte. This trick will not // work in case of SSE2 and AVX instructions. - if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins)) + if ((size != EA_1BYTE) && (ins != INS_imul) && (ins != INS_bsf) && (ins != INS_bsr) && !IsSSEInstruction(ins) && + !IsAVXInstruction(ins)) { code++; } @@ -10214,8 +10215,9 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Use the large version if this is not a byte - if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) && - !IsAVXInstruction(ins)) + // TODO-XArch-Cleanup Can the need for the 'w' size bit be encoded in the instruction flags? + if ((size != EA_1BYTE) && (ins != INS_imul) && (ins != INS_bsf) && (ins != INS_bsr) && (!insIsCMOV(ins)) && + !IsSSEInstruction(ins) && !IsAVXInstruction(ins)) { code |= 0x1; } @@ -11248,7 +11250,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) #endif // TARGET_AMD64 } #ifdef FEATURE_HW_INTRINSICS - else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt)) + else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || + (ins == INS_tzcnt)) { code = insEncodeRMreg(ins, code); if ((ins == INS_crc32) && (size > EA_1BYTE)) @@ -14826,6 +14829,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency += PERFSCORE_LATENCY_2C; break; + case INS_bsf: + case INS_bsr: case INS_pextrb: case INS_pextrd: case INS_pextrw: diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 240244ff20569..09c47655da3a2 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -359,6 +359,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_Vector256: genBaseIntrinsic(node); break; + case InstructionSet_X86Base: + case InstructionSet_X86Base_X64: + genX86BaseIntrinsic(node); + break; case InstructionSet_SSE: case InstructionSet_SSE_X64: genSSEIntrinsic(node); @@ -1249,6 +1253,40 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) genProduceReg(node); } +//------------------------------------------------------------------------ +// genX86BaseIntrinsic: Generates the code for an X86 base hardware intrinsic node +// +// Arguments: +// node - The hardware intrinsic node +// +void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + + switch (intrinsicId) + { + case NI_X86Base_BitScanForward: + case NI_X86Base_BitScanReverse: + case NI_X86Base_X64_BitScanForward: + case NI_X86Base_X64_BitScanReverse: + { + GenTree* op1 = node->gtGetOp1(); + regNumber targetReg = node->GetRegNum(); + var_types targetType = node->TypeGet(); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); + + genConsumeOperands(node); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1); + genProduceReg(node); + break; + } + + default: + unreached(); + break; + } +} + //------------------------------------------------------------------------ // genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node // diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index c2b1aee82c635..79bce97726918 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -78,6 +78,22 @@ HARDWARE_INTRINSIC(Vector256, WithElement, HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// X86Base Intrinsics +HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// X86Base 64-bit-only Intrinsics +HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/src/jit/hwintrinsicxarch.cpp b/src/coreclr/src/jit/hwintrinsicxarch.cpp index 286a07937ca0e..1bb0ddab84a5b 100644 --- a/src/coreclr/src/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsicxarch.cpp @@ -19,6 +19,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) { + case InstructionSet_X86Base: + return InstructionSet_X86Base_X64; case InstructionSet_SSE: return InstructionSet_SSE_X64; case InstructionSet_SSE2: @@ -134,6 +136,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { return InstructionSet_LZCNT; } + else if (strcmp(className, "X86Base") == 0) + { + return InstructionSet_X86Base; + } return InstructionSet_ILLEGAL; } @@ -374,6 +380,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_SSE42_X64: case InstructionSet_Vector128: case InstructionSet_Vector256: + case InstructionSet_X86Base: + case InstructionSet_X86Base_X64: { return true; } @@ -405,6 +413,8 @@ bool HWIntrinsicInfo::isScalarIsa(CORINFO_InstructionSet isa) case InstructionSet_LZCNT_X64: case InstructionSet_POPCNT: case InstructionSet_POPCNT_X64: + case InstructionSet_X86Base: + case InstructionSet_X86Base_X64: { return true; } diff --git a/src/coreclr/src/jit/instrsxarch.h b/src/coreclr/src/jit/instrsxarch.h index 986ce9ab450c1..cbb56aa04c434 100644 --- a/src/coreclr/src/jit/instrsxarch.h +++ b/src/coreclr/src/jit/instrsxarch.h @@ -87,6 +87,9 @@ INST4(lea, "lea", IUM_WR, BAD_CODE, BAD_CODE, // and the registers need to be reversed to get the correct encoding. INST3(bt, "bt", IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesFlags) +INST3(bsf, "bsf", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BC, INS_FLAGS_WritesFlags) +INST3(bsr, "bsr", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BD, INS_FLAGS_WritesFlags) + INST3(movsx, "movsx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BE, INS_FLAGS_None) #ifdef TARGET_AMD64 INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, 0x4800000063, INS_FLAGS_None) diff --git a/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 0db969fdc9701..632b7a9d2df5a 100644 --- a/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -35,6 +35,7 @@ public enum ReadyToRunInstructionSet Sha1=19, Sha256=20, Atomics=21, + X86Base=22, } } diff --git a/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 97fe00b465bc8..16cf47f43d686 100644 --- a/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -46,6 +46,8 @@ public static class ReadyToRunInstructionSetHelper { switch (instructionSet) { + case InstructionSet.X64_X86Base: return ReadyToRunInstructionSet.X86Base; + case InstructionSet.X64_X86Base_X64: return ReadyToRunInstructionSet.X86Base; case InstructionSet.X64_SSE: return ReadyToRunInstructionSet.Sse; case InstructionSet.X64_SSE_X64: return ReadyToRunInstructionSet.Sse; case InstructionSet.X64_SSE2: return ReadyToRunInstructionSet.Sse2; @@ -80,6 +82,7 @@ public static class ReadyToRunInstructionSetHelper { switch (instructionSet) { + case InstructionSet.X86_X86Base: return ReadyToRunInstructionSet.X86Base; case InstructionSet.X86_SSE: return ReadyToRunInstructionSet.Sse; case InstructionSet.X86_SSE2: return ReadyToRunInstructionSet.Sse2; case InstructionSet.X86_SSE3: return ReadyToRunInstructionSet.Sse3; diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoInstructionSet.cs index 256fd8db05368..51638ce5dae76 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -31,56 +31,60 @@ public enum InstructionSet ARM64_Atomics=10, ARM64_Vector64=11, ARM64_Vector128=12, - X64_SSE=1, - X64_SSE2=2, - X64_SSE3=3, - X64_SSSE3=4, - X64_SSE41=5, - X64_SSE42=6, - X64_AVX=7, - X64_AVX2=8, - X64_AES=9, - X64_BMI1=10, - X64_BMI2=11, - X64_FMA=12, - X64_LZCNT=13, - X64_PCLMULQDQ=14, - X64_POPCNT=15, - X64_Vector128=16, - X64_Vector256=17, - X64_BMI1_X64=18, - X64_BMI2_X64=19, - X64_LZCNT_X64=20, - X64_POPCNT_X64=21, - X64_SSE_X64=22, - X64_SSE2_X64=23, - X64_SSE41_X64=24, - X64_SSE42_X64=25, - X86_SSE=1, - X86_SSE2=2, - X86_SSE3=3, - X86_SSSE3=4, - X86_SSE41=5, - X86_SSE42=6, - X86_AVX=7, - X86_AVX2=8, - X86_AES=9, - X86_BMI1=10, - X86_BMI2=11, - X86_FMA=12, - X86_LZCNT=13, - X86_PCLMULQDQ=14, - X86_POPCNT=15, - X86_Vector128=16, - X86_Vector256=17, - X86_BMI1_X64=18, - X86_BMI2_X64=19, - X86_LZCNT_X64=20, - X86_POPCNT_X64=21, - X86_SSE_X64=22, - X86_SSE2_X64=23, - X86_SSE41_X64=24, - X86_SSE42_X64=25, + X64_X86Base=1, + X64_SSE=2, + X64_SSE2=3, + X64_SSE3=4, + X64_SSSE3=5, + X64_SSE41=6, + X64_SSE42=7, + X64_AVX=8, + X64_AVX2=9, + X64_AES=10, + X64_BMI1=11, + X64_BMI2=12, + X64_FMA=13, + X64_LZCNT=14, + X64_PCLMULQDQ=15, + X64_POPCNT=16, + X64_Vector128=17, + X64_Vector256=18, + X64_X86Base_X64=19, + X64_BMI1_X64=20, + X64_BMI2_X64=21, + X64_LZCNT_X64=22, + X64_POPCNT_X64=23, + X64_SSE_X64=24, + X64_SSE2_X64=25, + X64_SSE41_X64=26, + X64_SSE42_X64=27, + X86_X86Base=1, + X86_SSE=2, + X86_SSE2=3, + X86_SSE3=4, + X86_SSSE3=5, + X86_SSE41=6, + X86_SSE42=7, + X86_AVX=8, + X86_AVX2=9, + X86_AES=10, + X86_BMI1=11, + X86_BMI2=12, + X86_FMA=13, + X86_LZCNT=14, + X86_PCLMULQDQ=15, + X86_POPCNT=16, + X86_Vector128=17, + X86_Vector256=18, + X86_X86Base_X64=19, + X86_BMI1_X64=20, + X86_BMI2_X64=21, + X86_LZCNT_X64=22, + X86_POPCNT_X64=23, + X86_SSE_X64=24, + X86_SSE2_X64=25, + X86_SSE41_X64=26, + X86_SSE42_X64=27, } @@ -186,6 +190,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target break; case TargetArchitecture.X64: + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) + resultflags.AddInstructionSet(InstructionSet.X64_X86Base_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_SSE_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE_X64)) @@ -218,6 +226,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64)) resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) @@ -247,6 +257,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target break; case TargetArchitecture.X86: + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_SSE); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE3)) @@ -315,6 +327,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe break; case TargetArchitecture.X64: + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE_X64)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2_X64)) @@ -331,6 +345,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_LZCNT); if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64)) resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -360,6 +376,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe break; case TargetArchitecture.X86: + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) @@ -427,6 +445,7 @@ public static IEnumerable ArchitectureToValidInstructionSets break; case TargetArchitecture.X64: + yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X64_X86Base, true); yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X64_SSE, true); yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X64_SSE2, true); yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X64_SSE3, true); @@ -447,6 +466,7 @@ public static IEnumerable ArchitectureToValidInstructionSets break; case TargetArchitecture.X86: + yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X86_X86Base, true); yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X86_SSE, true); yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X86_SSE2, true); yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X86_SSE3, true); @@ -484,6 +504,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) break; case TargetArchitecture.X64: + if (HasInstructionSet(InstructionSet.X64_X86Base)) + AddInstructionSet(InstructionSet.X64_X86Base_X64); if (HasInstructionSet(InstructionSet.X64_SSE)) AddInstructionSet(InstructionSet.X64_SSE_X64); if (HasInstructionSet(InstructionSet.X64_SSE2)) diff --git a/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 0fcfc9b172e2e..479aea6562cef 100644 --- a/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -18,7 +18,9 @@ ; Definition of X86 instruction sets definearch ,X86 ,32Bit ,X64 +instructionset ,X86 ,X86Base , ,22 ,X86Base ,base instructionset ,X86 ,Sse , ,1 ,SSE ,sse +implication ,X86 ,SSE ,X86Base instructionset ,X86 ,Sse2 , ,2 ,SSE2 ,sse2 implication ,X86 ,SSE2 ,SSE instructionset ,X86 ,Sse3 , ,3 ,SSE3 ,sse3 @@ -51,6 +53,7 @@ instructionset ,X86 , , , ,Vector256, ; Definition of X64 instruction sets (Define ) definearch ,X64 ,64Bit ,X64 +instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,BMI1 instructionset64bit,X86 ,BMI2 instructionset64bit,X86 ,LZCNT diff --git a/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp b/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp index 85942a9551a14..63fa69eda34f4 100644 --- a/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp +++ b/src/coreclr/src/tools/crossgen2/jitinterface/jitwrapper.cpp @@ -27,11 +27,11 @@ class CORJIT_FLAGS uint64_t corJitFlags; }; -static const GUID JITEEVersionIdentifier = { /* bb6ea6c3-ce5a-4543-86b7-c9c88f9ec780 */ - 0xbb6ea6c3, - 0xce5a, - 0x4543, - { 0x86, 0xb7, 0xc9, 0xc8, 0x8f, 0x9e, 0xc7, 0x80 } +static const GUID JITEEVersionIdentifier = { /* 8b2226a2-ac30-4f5c-ae5c-926c792ecdb9 */ + 0x8b2226a2, + 0xac30, + 0x4f5c, + { 0xae, 0x5c, 0x92, 0x6c, 0x79, 0x2e, 0xcd, 0xb9 } }; class Jit diff --git a/src/coreclr/src/vm/codeman.cpp b/src/coreclr/src/vm/codeman.cpp index b13ccd1964c9f..a41e19c02b5e8 100644 --- a/src/coreclr/src/vm/codeman.cpp +++ b/src/coreclr/src/vm/codeman.cpp @@ -1299,6 +1299,8 @@ void EEJitManager::SetCpuInfo() #endif // TARGET_X86 #if defined(TARGET_X86) || defined(TARGET_AMD64) + CPUCompileFlags.Set(InstructionSet_X86Base); + // NOTE: The below checks are based on the information reported by // Intel® 64 and IA-32 Architectures Software Developer’s Manual. Volume 2 // and diff --git a/src/coreclr/src/zap/zapinfo.cpp b/src/coreclr/src/zap/zapinfo.cpp index d16f4a1d5ca0e..5c88c0846b439 100644 --- a/src/coreclr/src/zap/zapinfo.cpp +++ b/src/coreclr/src/zap/zapinfo.cpp @@ -2161,7 +2161,7 @@ DWORD FilterNamedIntrinsicMethodAttribs(ZapInfo* pZapInfo, DWORD attribs, CORINF bool fIsPlatformSubArchitecture = false; #if defined(TARGET_X86) || defined(TARGET_AMD64) - fIsPlatformRequiredISA = (strcmp(isaName, "Sse") == 0) || (strcmp(isaName, "Sse2") == 0); + fIsPlatformRequiredISA = (strcmp(isaName, "X86Base") == 0) || (strcmp(isaName, "Sse") == 0) || (strcmp(isaName, "Sse2") == 0); fIsPlatformSubArchitecture = strcmp(className, "X64") == 0; #elif defined(TARGET_ARM64) fIsPlatformRequiredISA = (strcmp(isaName, "ArmBase") == 0) || (strcmp(isaName, "AdvSimd") == 0); diff --git a/src/coreclr/src/zap/zapper.cpp b/src/coreclr/src/zap/zapper.cpp index 41324badf949e..e7e22b4bd6698 100644 --- a/src/coreclr/src/zap/zapper.cpp +++ b/src/coreclr/src/zap/zapper.cpp @@ -1184,6 +1184,7 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo) #if defined(TARGET_X86) || defined(TARGET_AMD64) // .NET Core requires SSE2. + m_pOpt->m_compilerFlags.Set(InstructionSet_X86Base); m_pOpt->m_compilerFlags.Set(InstructionSet_SSE); m_pOpt->m_compilerFlags.Set(InstructionSet_SSE2); #endif diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 23bb6cb96aefa..8ae9ebc7263df 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -1784,6 +1784,7 @@ + @@ -1801,6 +1802,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index 9844240e1058a..c8356a0c9eb01 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -67,13 +67,21 @@ public static int LeadingZeroCount(uint value) return ArmBase.LeadingZeroCount(value); } - // Unguarded fallback contract is 0->31 + // Unguarded fallback contract is 0->31, BSR contract is 0->undefined if (value == 0) { return 32; } - return 31 - Log2SoftwareFallback(value); + if (X86Base.IsSupported) + { + // LZCNT returns index starting from MSB, whereas BSR gives the index from LSB. + // 31 ^ BSR here is equivalent to 31 - BSR since the BSR result is always between 0 and 31. + // This saves an instruction, as subtraction from constant requires either MOV/SUB or NEG/ADD. + return 31 ^ (int)X86Base.BitScanReverse(value); + } + + return 31 ^ Log2SoftwareFallback(value); } /// @@ -96,6 +104,12 @@ public static int LeadingZeroCount(ulong value) return ArmBase.Arm64.LeadingZeroCount(value); } + if (X86Base.X64.IsSupported) + { + // BSR contract is 0->undefined + return value == 0 ? 64 : 63 ^ (int)X86Base.X64.BitScanReverse(value); + } + uint hi = (uint)(value >> 32); if (hi == 0) @@ -130,13 +144,19 @@ public static int Log2(uint value) // 1000.. 0 31-0 31 if (Lzcnt.IsSupported) { - // LZCNT contract is 0->32 - return 31 - (int)Lzcnt.LeadingZeroCount(value); + return 31 ^ (int)Lzcnt.LeadingZeroCount(value); } if (ArmBase.IsSupported) { - return 31 - ArmBase.LeadingZeroCount(value); + return 31 ^ ArmBase.LeadingZeroCount(value); + } + + // BSR returns the answer we're looking for directly. + // However BSR is much slower than LZCNT on AMD processors, so we leave it as a fallback only. + if (X86Base.IsSupported) + { + return (int)X86Base.BitScanReverse(value); } // Fallback contract is 0->0 @@ -160,13 +180,17 @@ public static int Log2(ulong value) if (Lzcnt.X64.IsSupported) { - // LZCNT contract is 0->64 - return 63 - (int)Lzcnt.X64.LeadingZeroCount(value); + return 63 ^ (int)Lzcnt.X64.LeadingZeroCount(value); } if (ArmBase.Arm64.IsSupported) { - return 63 - ArmBase.Arm64.LeadingZeroCount(value); + return 63 ^ ArmBase.Arm64.LeadingZeroCount(value); + } + + if (X86Base.X64.IsSupported) + { + return (int)X86Base.X64.BitScanReverse(value); } uint hi = (uint)(value >> 32); @@ -301,12 +325,17 @@ public static int TrailingZeroCount(uint value) return ArmBase.LeadingZeroCount(ArmBase.ReverseElementBits(value)); } - // Unguarded fallback contract is 0->0 + // Unguarded fallback contract is 0->0, BSF contract is 0->undefined if (value == 0) { return 32; } + if (X86Base.IsSupported) + { + return (int)X86Base.BitScanForward(value); + } + // uint.MaxValue >> 27 is always in range [0 - 31] so we use Unsafe.AddByteOffset to avoid bounds check return Unsafe.AddByteOffset( // Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u @@ -343,6 +372,13 @@ public static int TrailingZeroCount(ulong value) { return ArmBase.Arm64.LeadingZeroCount(ArmBase.Arm64.ReverseElementBits(value)); } + + if (X86Base.X64.IsSupported) + { + // BSF contract is 0->undefined + return value == 0 ? 64 : (int)X86Base.X64.BitScanForward(value); + } + uint lo = (uint)value; if (lo == 0) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.PlatformNotSupported.cs new file mode 100644 index 0000000000000..39ba429010cf9 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.PlatformNotSupported.cs @@ -0,0 +1,66 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// + /// This class provides access to the x86 base hardware instructions via intrinsics + /// + internal static class X86Base + { + public static bool IsSupported { [Intrinsic] get => false; } + + internal static class X64 + { + public static bool IsSupported { [Intrinsic] get => false; } + + /// + /// unsigned char _BitScanForward64 (unsigned __int32* index, unsigned __int64 a) + /// BSF reg reg/m64 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static ulong BitScanForward(ulong value) { throw new PlatformNotSupportedException(); } + + /// + /// unsigned char _BitScanReverse64 (unsigned __int32* index, unsigned __int64 a) + /// BSR reg reg/m64 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static ulong BitScanReverse(ulong value) { throw new PlatformNotSupportedException(); } + } + + /// + /// unsigned char _BitScanForward (unsigned __int32* index, unsigned __int32 a) + /// BSF reg reg/m32 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static uint BitScanForward(uint value) { throw new PlatformNotSupportedException(); } + + /// + /// unsigned char _BitScanReverse (unsigned __int32* index, unsigned __int32 a) + /// BSR reg reg/m32 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static uint BitScanReverse(uint value) { throw new PlatformNotSupportedException(); } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.cs new file mode 100644 index 0000000000000..af2f5d3540add --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.cs @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// + /// This class provides access to the x86 base hardware instructions via intrinsics + /// + [Intrinsic] + internal static class X86Base + { + public static bool IsSupported { get => IsSupported; } + + [Intrinsic] + internal static class X64 + { + public static bool IsSupported { get => IsSupported; } + + /// + /// unsigned char _BitScanForward64 (unsigned __int32* index, unsigned __int64 a) + /// BSF reg reg/m64 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static ulong BitScanForward(ulong value) => BitScanForward(value); + + /// + /// unsigned char _BitScanReverse64 (unsigned __int32* index, unsigned __int64 a) + /// BSR reg reg/m64 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static ulong BitScanReverse(ulong value) => BitScanReverse(value); + } + + /// + /// unsigned char _BitScanForward (unsigned __int32* index, unsigned __int32 a) + /// BSF reg reg/m32 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static uint BitScanForward(uint value) => BitScanForward(value); + + /// + /// unsigned char _BitScanReverse (unsigned __int32* index, unsigned __int32 a) + /// BSR reg reg/m32 + /// The above native signature does not directly correspond to the managed signature. + /// + /// + /// This method is to remain internal. + /// Its functionality is exposed in the public class. + /// + internal static uint BitScanReverse(uint value) => BitScanReverse(value); + } +} diff --git a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs index 33c59a3f122db..91059ddbeb3a5 100644 --- a/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs +++ b/src/libraries/System.Utf8String.Experimental/src/System/Runtime/Intrinsics/Intrinsics.Shims.cs @@ -26,6 +26,18 @@ internal readonly struct Vector128 namespace System.Runtime.Intrinsics.X86 { + internal static class X86Base + { + internal static class X64 + { + public const bool IsSupported = false; + internal static ulong BitScanForward(ulong value) => throw new PlatformNotSupportedException(); + internal static ulong BitScanReverse(ulong value) => throw new PlatformNotSupportedException(); + } + public const bool IsSupported = false; + internal static uint BitScanForward(uint value) => throw new PlatformNotSupportedException(); + internal static uint BitScanReverse(uint value) => throw new PlatformNotSupportedException(); + } internal abstract class Bmi1 { public abstract class X64