From 66556c467d0b2419b5c2a261fdfa2703e6b3f6bc Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Sat, 22 Jul 2023 21:13:29 -0700 Subject: [PATCH] Unify hardware feature detection between CoreCLR JIT and AOT (#89342) Follow up on #87865 --- src/coreclr/CMakeLists.txt | 1 - src/coreclr/classlibnative/bcltype/system.cpp | 4 +- src/coreclr/gc/vxsort/isa_detection.cpp | 24 +- src/coreclr/minipal/Unix/CMakeLists.txt | 17 +- src/coreclr/minipal/Windows/CMakeLists.txt | 14 +- .../nativeaot/Runtime/amd64/MemClrForGC.asm | 99 --- .../nativeaot/Runtime/i386/MemClrForGC.asm | 148 ----- src/coreclr/pal/inc/pal.h | 54 -- src/coreclr/pal/src/CMakeLists.txt | 2 - src/coreclr/pal/src/arch/amd64/processor.cpp | 79 --- src/coreclr/pal/src/arch/arm/processor.cpp | 21 - src/coreclr/pal/src/arch/arm64/processor.cpp | 21 - src/coreclr/pal/src/arch/i386/processor.cpp | 21 - .../pal/src/arch/loongarch64/processor.cpp | 21 - .../pal/src/arch/ppc64le/processor.cpp | 21 - .../pal/src/arch/riscv64/processor.cpp | 21 - src/coreclr/pal/src/arch/s390x/processor.cpp | 22 - src/coreclr/pal/src/include/pal/context.h | 2 + src/coreclr/pal/src/misc/jitsupport.cpp | 302 --------- src/coreclr/vm/amd64/AsmHelpers.asm | 32 - src/coreclr/vm/amd64/unixstubs.cpp | 43 -- src/coreclr/vm/codeman.cpp | 592 +++++------------- src/coreclr/vm/i386/jitinterfacex86.cpp | 2 + src/native/minipal/cpufeatures.c | 64 +- src/native/minipal/cpuid.h | 12 +- 25 files changed, 192 insertions(+), 1447 deletions(-) delete mode 100644 src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm delete mode 100644 src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm delete mode 100644 src/coreclr/pal/src/arch/amd64/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/arm/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/arm64/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/i386/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/loongarch64/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/ppc64le/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/riscv64/processor.cpp delete mode 100644 src/coreclr/pal/src/arch/s390x/processor.cpp delete mode 100644 src/coreclr/pal/src/misc/jitsupport.cpp diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index d21c158fee2be..925fc6d447652 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -191,7 +191,6 @@ include_directories("debug/inc/${ARCH_SOURCES_DIR}") include_directories("debug/inc/dump") include_directories("md/inc") include_directories("classlibnative/bcltype") -include_directories("classlibnative/cryptography") include_directories("classlibnative/inc") include_directories("${GENERATED_INCLUDE_DIR}") include_directories("hosts/inc") diff --git a/src/coreclr/classlibnative/bcltype/system.cpp b/src/coreclr/classlibnative/bcltype/system.cpp index 325afdc9cf379..ef02743b36696 100644 --- a/src/coreclr/classlibnative/bcltype/system.cpp +++ b/src/coreclr/classlibnative/bcltype/system.cpp @@ -30,7 +30,7 @@ #include "array.h" #include "eepolicy.h" - +#include FCIMPL0(UINT32, SystemNative::GetTickCount) @@ -50,8 +50,6 @@ FCIMPL0(UINT64, SystemNative::GetTickCount64) FCIMPLEND; - - extern "C" VOID QCALLTYPE Environment_Exit(INT32 exitcode) { QCALL_CONTRACT; diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index 1dcb7913a8696..93c7288663c42 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -19,28 +19,6 @@ enum class SupportedISA #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) -static DWORD64 GetEnabledXStateFeaturesHelper() -{ - // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX is supported - typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = LoadLibraryExW(L"kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); - if (hMod == NULL) - return 0; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return 0; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - - return FeatureMask; -} - SupportedISA DetermineSupportedISA() { // register definitions to make the following code more readable @@ -78,7 +56,7 @@ SupportedISA DetermineSupportedISA() DWORD64 xcr0 = _xgetbv(0); // get OS XState info - DWORD64 FeatureMask = GetEnabledXStateFeaturesHelper(); + DWORD64 FeatureMask = GetEnabledXStateFeatures(); // get processor extended feature flag info __cpuidex(reg, 7, 0); diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt index 0c09c76d9583f..ca41eb4e2bb0d 100644 --- a/src/coreclr/minipal/Unix/CMakeLists.txt +++ b/src/coreclr/minipal/Unix/CMakeLists.txt @@ -1,5 +1,18 @@ -add_library(coreclrminipal - STATIC +set(SOURCES doublemapping.cpp dn-u16.cpp ) + +if(NOT CLR_CROSS_COMPONENTS_BUILD) + list(APPEND SOURCES + ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c + ) +endif() + +add_library(coreclrminipal + STATIC + ${SOURCES} +) + +include(${CLR_SRC_NATIVE_DIR}/minipal/configure.cmake) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/coreclr/minipal/Windows/CMakeLists.txt b/src/coreclr/minipal/Windows/CMakeLists.txt index 0c09c76d9583f..0c83eeade09d1 100644 --- a/src/coreclr/minipal/Windows/CMakeLists.txt +++ b/src/coreclr/minipal/Windows/CMakeLists.txt @@ -1,5 +1,15 @@ -add_library(coreclrminipal - STATIC +set(SOURCES doublemapping.cpp dn-u16.cpp ) + +if(NOT CLR_CROSS_COMPONENTS_BUILD) + list(APPEND SOURCES + ${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c + ) +endif() + +add_library(coreclrminipal + STATIC + ${SOURCES} +) diff --git a/src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm b/src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm deleted file mode 100644 index de5476cad4555..0000000000000 --- a/src/coreclr/nativeaot/Runtime/amd64/MemClrForGC.asm +++ /dev/null @@ -1,99 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - -include AsmMacros.inc - - -LEAF_ENTRY memclr_for_gc, _TEXT - -; x64 version - -; we get the following parameters -; rcx = destination address -; rdx = size to clear - - ; save rdi - this should be faster than a push - mov r11,rdi - - xor eax, eax - - ; check alignment of destination - test cl,7 - jnz alignDest -alignDone: - ; now destination is qword aligned - ; move it to rdi for rep stos - mov rdi,rcx - - ; compute number of bytes to clear non-temporally - ; we wish to clear the first 8k or so with rep stos, - ; anything above that non-temporally - - xor r8,r8 - cmp rdx,8*1024 - jbe noNonTempClear - - ; compute the number of bytes above 8k - ; and round down to a multiple of 64 - mov r8,rdx - sub r8,8*1024 - and r8,not 63 - - ; compute remaining size to clear temporally - sub rdx,r8 - -noNonTempClear: - - ; do the temporal clear - mov rcx,rdx - shr rcx,3 - rep stosq - - ; do the non-temporal clear - test r8,r8 - jne nonTempClearLoop - -nonTempClearDone: - - ; clear any remaining bytes - mov rcx,rdx - and rcx,7 - rep stosb - - ; restore rdi - mov rdi,r11 - - ret - - ; this is the infrequent case, hence out of line -nonTempClearLoop: - movnti [rdi+ 0],rax - movnti [rdi+ 8],rax - movnti [rdi+16],rax - movnti [rdi+24],rax - - movnti [rdi+32],rax - movnti [rdi+40],rax - movnti [rdi+48],rax - movnti [rdi+56],rax - - add rdi,64 - sub r8,64 - ja nonTempClearLoop - jmp nonTempClearDone - -alignDest: - test rdx,rdx - je alignDone -alignLoop: - mov [rcx],al - add rcx,1 - sub rdx,1 - jz alignDone - test cl,7 - jnz alignLoop - jmp alignDone - -LEAF_END memclr_for_gc, _TEXT - - end diff --git a/src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm b/src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm deleted file mode 100644 index d093e0adce0bb..0000000000000 --- a/src/coreclr/nativeaot/Runtime/i386/MemClrForGC.asm +++ /dev/null @@ -1,148 +0,0 @@ -;; Licensed to the .NET Foundation under one or more agreements. -;; The .NET Foundation licenses this file to you under the MIT license. - - .586 - .xmm - .model flat - option casemap:none - - -EXTERN _IsProcessorFeaturePresent@4 : PROC - -PF_XMMI64_INSTRUCTIONS_AVAILABLE equ 10 - - .data -canUseSSE2 db 0 - - .code - -_memclr_for_gc@8 proc public - -; x86 version - -; we get the following parameters -; ecx = destination address -; edx = size to clear - - push ebx - push edi - - xor eax, eax - - ; load destination - mov edi,[esp+8+4] - - ; load size - mov ebx,[esp+8+8] - - ; check alignment of destination - test edi,3 - jnz alignDest -alignDone: - ; now destination is dword aligned - - ; compute number of bytes to clear non-temporally - ; we wish to clear the first 8k or so with rep stos, - ; anything above that non-temporally - - xor edx,edx - cmp ebx,8*1024 - jbe noNonTempClear - - ; can we use SSE2 instructions? - cmp canUseSSE2,0 - js noNonTempClear - jz computeCanUseSSE2 - -computeNonTempClear: - - ; compute the number of bytes above 8k - ; and round down to a multiple of 64 - mov edx,ebx - sub edx,8*1024 - and edx,not 63 - - ; compute remaining size to clear temporally - sub ebx,edx - -noNonTempClear: - ; do the temporal clear - mov ecx,ebx - shr ecx,2 - rep stosd - - ; do the non-temporal clear - test edx,edx - jne nonTempClearLoop - -nonTempClearDone: - - ; clear any remaining bytes - mov ecx,ebx - and ecx,3 - rep stosb - - pop edi - pop ebx - ret 8 - - ; this is the infrequent case, hence out of line -nonTempClearLoop: - movnti [edi+ 0],eax - movnti [edi+ 4],eax - movnti [edi+ 8],eax - movnti [edi+12],eax - - movnti [edi+16],eax - movnti [edi+20],eax - movnti [edi+24],eax - movnti [edi+28],eax - - movnti [edi+32],eax - movnti [edi+36],eax - movnti [edi+40],eax - movnti [edi+44],eax - - movnti [edi+48],eax - movnti [edi+52],eax - movnti [edi+56],eax - movnti [edi+60],eax - - add edi,64 - sub edx,64 - ja nonTempClearLoop - jmp nonTempClearDone - -alignDest: - test ebx,ebx - je alignDone -alignLoop: - mov [edi],al - add edi,1 - sub ebx,1 - jz alignDone - test edi,3 - jnz alignLoop - jmp alignDone - -computeCanUseSSE2: - ; we are not using the sse2 register set, - ; just sse2 instructions (movnti), - ; thus we just ask the OS about the usability of the instructions - ; OS bugs about saving/restoring registers like in early versions - ; of Vista etc. in the WoW shouldn't matter - - push PF_XMMI64_INSTRUCTIONS_AVAILABLE - call _IsProcessorFeaturePresent@4 - mov ecx,eax - xor eax,eax ; reset eax to 0 - test ecx,ecx - mov canUseSSE2,1 - jne computeNonTempClear - mov canUseSSE2,-1 - xor edx,edx - jmp noNonTempClear - -_memclr_for_gc@8 endp - - end diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 936b4100ec0e9..494e5625ff966 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1325,47 +1325,6 @@ QueueUserAPC( IN HANDLE hThread, IN ULONG_PTR dwData); -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if defined(HOST_X86) || defined(HOST_AMD64) -// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures -// We define matching signatures for use on Unix platforms. -// -// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid - -#if __has_builtin(__cpuid) -extern "C" void __cpuid(int cpuInfo[4], int function_id); -#else -inline void __cpuid(int cpuInfo[4], int function_id) -{ - // Based on the Clang implementation provided in cpuid.h: - // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h - - __asm(" cpuid\n" \ - : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \ - : "0"(function_id) - ); -} -#endif // __cpuid - -#if __has_builtin(__cpuidex) -extern "C" void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id); -#else -inline void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id) -{ - // Based on the Clang implementation provided in cpuid.h: - // https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/cpuid.h - - __asm(" cpuid\n" \ - : "=a"(cpuInfo[0]), "=b"(cpuInfo[1]), "=c"(cpuInfo[2]), "=d"(cpuInfo[3]) \ - : "0"(function_id), "2"(subFunction_id) - ); -} -#endif // __cpuidex -#endif // HOST_X86 || HOST_AMD64 - #ifdef HOST_X86 // @@ -4518,19 +4477,6 @@ void _mm_setcsr(unsigned int i); /******************* PAL functions for CPU capability detection *******/ -#ifdef __cplusplus - -#if defined(HOST_ARM64) && defined(TARGET_ARM64) -class CORJIT_FLAGS; - -PALIMPORT -VOID -PALAPI -PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags); -#endif // HOST_ARM64 && TARGET_ARM64 - -#endif - #ifdef __cplusplus PALIMPORT diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt index 804a71234031d..26f0fa0295dec 100644 --- a/src/coreclr/pal/src/CMakeLists.txt +++ b/src/coreclr/pal/src/CMakeLists.txt @@ -106,7 +106,6 @@ set(ARCH_SOURCES arch/${PAL_ARCH_SOURCES_DIR}/context2.S arch/${PAL_ARCH_SOURCES_DIR}/debugbreak.S arch/${PAL_ARCH_SOURCES_DIR}/exceptionhelper.S - arch/${PAL_ARCH_SOURCES_DIR}/processor.cpp ) if(NOT CLR_CMAKE_TARGET_OSX) @@ -162,7 +161,6 @@ set(SOURCES misc/error.cpp misc/errorstrings.cpp misc/fmtmessage.cpp - misc/jitsupport.cpp misc/miscpalapi.cpp misc/perfjitdump.cpp misc/strutil.cpp diff --git a/src/coreclr/pal/src/arch/amd64/processor.cpp b/src/coreclr/pal/src/arch/amd64/processor.cpp deleted file mode 100644 index 0fe9ff7c18344..0000000000000 --- a/src/coreclr/pal/src/arch/amd64/processor.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the Intel x86/x64 - platforms. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" - -/*++ -Function: -XmmYmmStateSupport - -Check if OS has enabled both XMM and YMM state support - -Return value: -1 if XMM and YMM are enabled, 0 otherwise ---*/ -extern "C" unsigned int XmmYmmStateSupport() -{ - unsigned int eax; - __asm(" mov $1, %%eax\n" \ - " cpuid\n" \ - " xor %%eax, %%eax\n" \ - " and $0x18000000, %%ecx\n" /* check for xsave feature set and that it is enabled by the OS */ \ - " cmp $0x18000000, %%ecx\n" \ - " jne end\n" \ - " xor %%ecx, %%ecx\n" \ - " xgetbv\n" \ - "end:\n" \ - : "=a"(eax) /* output in eax */ \ - : /* no inputs */ \ - : "ebx", "ecx", "edx" /* registers that are clobbered */ - ); - // Check OS has enabled both XMM and YMM state support - return ((eax & 0x06) == 0x06) ? 1 : 0; -} - -/*++ -Function: -Avx512StateSupport - -Check if OS has enabled XMM, YMM and ZMM state support - -Return value: -1 if XMM, YMM and ZMM are enabled, 0 otherwise ---*/ -extern "C" unsigned int Avx512StateSupport() -{ - unsigned int eax; - __asm(" mov $1, %%eax\n" \ - " cpuid\n" \ - " xor %%eax, %%eax\n" \ - " and $0x18000000, %%ecx\n" /* check for xsave feature set and that it is enabled by the OS */ \ - " cmp $0x18000000, %%ecx\n" \ - " jne endz\n" \ - " xor %%ecx, %%ecx\n" \ - " xgetbv\n" \ - "endz:\n" \ - : "=a"(eax) /* output in eax */ \ - : /* no inputs */ \ - : "ebx", "ecx", "edx" /* registers that are clobbered */ - ); - // Check OS has enabled XMM, YMM and ZMM state support - return ((eax & 0x0E6) == 0x0E6) ? 1 : 0; -} diff --git a/src/coreclr/pal/src/arch/arm/processor.cpp b/src/coreclr/pal/src/arch/arm/processor.cpp deleted file mode 100644 index 7048a5b4b4db4..0000000000000 --- a/src/coreclr/pal/src/arch/arm/processor.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the ARM - platform. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" diff --git a/src/coreclr/pal/src/arch/arm64/processor.cpp b/src/coreclr/pal/src/arch/arm64/processor.cpp deleted file mode 100644 index ab4b84febd91c..0000000000000 --- a/src/coreclr/pal/src/arch/arm64/processor.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the ARM64 - platform. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" diff --git a/src/coreclr/pal/src/arch/i386/processor.cpp b/src/coreclr/pal/src/arch/i386/processor.cpp deleted file mode 100644 index 7f60b75cfe5ee..0000000000000 --- a/src/coreclr/pal/src/arch/i386/processor.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the Intel x86/x64 - platforms. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" diff --git a/src/coreclr/pal/src/arch/loongarch64/processor.cpp b/src/coreclr/pal/src/arch/loongarch64/processor.cpp deleted file mode 100644 index c0cc5beb851ec..0000000000000 --- a/src/coreclr/pal/src/arch/loongarch64/processor.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the LOONGARCH64 - platform. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" diff --git a/src/coreclr/pal/src/arch/ppc64le/processor.cpp b/src/coreclr/pal/src/arch/ppc64le/processor.cpp deleted file mode 100644 index 6680dbc532677..0000000000000 --- a/src/coreclr/pal/src/arch/ppc64le/processor.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the IBM PowerPC (ppc64le) - platforms. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" diff --git a/src/coreclr/pal/src/arch/riscv64/processor.cpp b/src/coreclr/pal/src/arch/riscv64/processor.cpp deleted file mode 100644 index 4e8998679e46a..0000000000000 --- a/src/coreclr/pal/src/arch/riscv64/processor.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the RISCV64 - platform. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" diff --git a/src/coreclr/pal/src/arch/s390x/processor.cpp b/src/coreclr/pal/src/arch/s390x/processor.cpp deleted file mode 100644 index d2d0fdd5e8f08..0000000000000 --- a/src/coreclr/pal/src/arch/s390x/processor.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*++ - - - -Module Name: - - processor.cpp - -Abstract: - - Implementation of processor related functions for the IBM s390x - platforms. These functions are processor dependent. - - - ---*/ - -#include "pal/palinternal.h" - diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 011860c904d36..88566730e961b 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -28,6 +28,8 @@ extern "C" #include #include +#include + /* A type to wrap the native context type, which is ucontext_t on some * platforms and another type elsewhere. */ #if HAVE_UCONTEXT_T diff --git a/src/coreclr/pal/src/misc/jitsupport.cpp b/src/coreclr/pal/src/misc/jitsupport.cpp deleted file mode 100644 index 3042629004351..0000000000000 --- a/src/coreclr/pal/src/misc/jitsupport.cpp +++ /dev/null @@ -1,302 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - - -#include "pal/palinternal.h" -#include "pal/dbgmsg.h" -SET_DEFAULT_DEBUG_CHANNEL(MISC); - -#include "../../../inc/corjitflags.h" - -#if HAVE_AUXV_HWCAP_H -#include -#include -#endif - -#if HAVE_SYSCTLBYNAME -#include -#endif - -#if defined(HOST_ARM64) && defined(__linux__) -struct CpuCapability -{ - const char* name; - unsigned long hwCapFlag; -}; - -static const CpuCapability CpuCapabilities[] = { - //{ "fp", HWCAP_FP }, -#ifdef HWCAP_ASIMD - { "asimd", HWCAP_ASIMD }, -#endif - //{ "evtstrm", HWCAP_EVTSTRM }, -#ifdef HWCAP_AES - { "aes", HWCAP_AES }, -#endif - //{ "pmull", HWCAP_PMULL }, -#ifdef HWCAP_SHA1 - { "sha1", HWCAP_SHA1 }, -#endif -#ifdef HWCAP_SHA2 - { "sha2", HWCAP_SHA2 }, -#endif -#ifdef HWCAP_CRC32 - { "crc32", HWCAP_CRC32 }, -#endif -#ifdef HWCAP_ATOMICS - { "atomics", HWCAP_ATOMICS }, -#endif - //{ "fphp", HWCAP_FPHP }, - //{ "asimdhp", HWCAP_ASIMDHP }, - //{ "cpuid", HWCAP_CPUID }, -#ifdef HWCAP_ASIMDRDM - { "asimdrdm", HWCAP_ASIMDRDM }, -#endif - //{ "jscvt", HWCAP_JSCVT }, - //{ "fcma", HWCAP_FCMA }, -#ifdef HWCAP_LRCPC - { "lrcpc", HWCAP_LRCPC }, -#endif - //{ "dcpop", HWCAP_DCPOP }, - //{ "sha3", HWCAP_SHA3 }, - //{ "sm3", HWCAP_SM3 }, - //{ "sm4", HWCAP_SM4 }, -#ifdef HWCAP_ASIMDDP - { "asimddp", HWCAP_ASIMDDP }, -#endif - //{ "sha512", HWCAP_SHA512 }, - //{ "sve", HWCAP_SVE }, - //{ "asimdfhm", HWCAP_ASIMDFHM }, - //{ "dit", HWCAP_DIT }, - //{ "uscat", HWCAP_USCAT }, - //{ "ilrcpc", HWCAP_ILRCPC }, - //{ "flagm", HWCAP_FLAGM }, - //{ "ssbs", HWCAP_SSBS }, - //{ "sb", HWCAP_SB }, - //{ "paca", HWCAP_PACA }, - //{ "pacg", HWCAP_PACG }, - - // Ensure the array is never empty - { "", 0 } -}; - -// Returns the HWCAP_* flag corresponding to the given capability name. -// If the capability name is not recognized or unused at present, zero is returned. -static unsigned long LookupCpuCapabilityFlag(const char* start, size_t length) -{ - for (size_t i = 0; i < ARRAY_SIZE(CpuCapabilities); i++) - { - const char* capabilityName = CpuCapabilities[i].name; - if ((length == strlen(capabilityName)) && (memcmp(start, capabilityName, length) == 0)) - { - return CpuCapabilities[i].hwCapFlag; - } - } - return 0; -} - -// Reads the first Features entry from /proc/cpuinfo (assuming other entries are essentially -// identical) and translates it into a set of HWCAP_* flags. -static unsigned long GetCpuCapabilityFlagsFromCpuInfo() -{ - unsigned long capabilityFlags = 0; - FILE* cpuInfoFile = fopen("/proc/cpuinfo", "r"); - - if (cpuInfoFile != NULL) - { - char* line = nullptr; - size_t lineLen = 0; - - while (getline(&line, &lineLen, cpuInfoFile) != -1) - { - char* p = line; - while (isspace(*p)) p++; - - if (memcmp(p, "Features", 8) != 0) - continue; - - // Skip "Features" and look for ':' - p += 8; - - while (isspace(*p)) p++; - if (*p != ':') - continue; - - // Skip ':' and parse the list - p++; - - while (true) - { - while (isspace(*p)) p++; - if (*p == 0) - break; - - char* start = p++; - while ((*p != 0) && !isspace(*p)) p++; - - capabilityFlags |= LookupCpuCapabilityFlag(start, p - start); - } - - break; - } - - free(line); - fclose(cpuInfoFile); - } - - return capabilityFlags; -} -#endif // defined(HOST_ARM64) && defined(__linux__) - -#if defined(HOST_ARM64) && defined(TARGET_ARM64) -PALIMPORT -VOID -PALAPI -PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags) -{ - _ASSERTE(flags); - -#if HAVE_AUXV_HWCAP_H - unsigned long hwCap = getauxval(AT_HWCAP); - -#if defined(__linux__) - // getauxval(AT_HWCAP) returns zero on WSL1 (https://github.com/microsoft/WSL/issues/3682), - // fall back to reading capabilities from /proc/cpuinfo. - if (hwCap == 0) - hwCap = GetCpuCapabilityFlagsFromCpuInfo(); -#endif - -// HWCAP_* flags are introduced by ARM into the Linux kernel as new extensions are published. -// For a given kernel, some of these flags may not be present yet. -// Use ifdef for each to allow for compilation with any vintage kernel. -// From a single binary distribution perspective, compiling with latest kernel asm/hwcap.h should -// include all published flags. Given flags are merged to kernel and published before silicon is -// available, using the latest kernel for release should be sufficient. - flags->Set(InstructionSet_ArmBase); -#ifdef HWCAP_AES - if (hwCap & HWCAP_AES) - flags->Set(InstructionSet_Aes); -#endif -#ifdef HWCAP_ATOMICS - if (hwCap & HWCAP_ATOMICS) - flags->Set(InstructionSet_Atomics); -#endif -#ifdef HWCAP_CRC32 - if (hwCap & HWCAP_CRC32) - flags->Set(InstructionSet_Crc32); -#endif -#ifdef HWCAP_DCPOP -// if (hwCap & HWCAP_DCPOP) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_DCPOP); -#endif -#ifdef HWCAP_ASIMDDP - if (hwCap & HWCAP_ASIMDDP) - flags->Set(InstructionSet_Dp); -#endif -#ifdef HWCAP_FCMA -// if (hwCap & HWCAP_FCMA) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FCMA); -#endif -#ifdef HWCAP_FP -// if (hwCap & HWCAP_FP) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP); -#endif -#ifdef HWCAP_FPHP -// if (hwCap & HWCAP_FPHP) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP16); -#endif -#ifdef HWCAP_JSCVT -// if (hwCap & HWCAP_JSCVT) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_JSCVT); -#endif -#ifdef HWCAP_LRCPC - if (hwCap & HWCAP_LRCPC) - flags->Set(InstructionSet_Rcpc); -#endif -#ifdef HWCAP_PMULL -// if (hwCap & HWCAP_PMULL) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_PMULL); -#endif -#ifdef HWCAP_SHA1 - if (hwCap & HWCAP_SHA1) - flags->Set(InstructionSet_Sha1); -#endif -#ifdef HWCAP_SHA2 - if (hwCap & HWCAP_SHA2) - flags->Set(InstructionSet_Sha256); -#endif -#ifdef HWCAP_SHA512 -// if (hwCap & HWCAP_SHA512) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SHA512); -#endif -#ifdef HWCAP_SHA3 -// if (hwCap & HWCAP_SHA3) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SHA3); -#endif -#ifdef HWCAP_ASIMD - if (hwCap & HWCAP_ASIMD) - { - flags->Set(InstructionSet_AdvSimd); - flags->Set(InstructionSet_VectorT128); - } -#endif -#ifdef HWCAP_ASIMDRDM - if (hwCap & HWCAP_ASIMDRDM) - flags->Set(InstructionSet_Rdm); -#endif -#ifdef HWCAP_ASIMDHP -// if (hwCap & HWCAP_ASIMDHP) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_ADVSIMD_FP16); -#endif -#ifdef HWCAP_SM3 -// if (hwCap & HWCAP_SM3) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SM3); -#endif -#ifdef HWCAP_SM4 -// if (hwCap & HWCAP_SM4) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SM4); -#endif -#ifdef HWCAP_SVE -// if (hwCap & HWCAP_SVE) -// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_SVE); -#endif -#else // !HAVE_AUXV_HWCAP_H -#if HAVE_SYSCTLBYNAME - int64_t valueFromSysctl = 0; - size_t sz = sizeof(valueFromSysctl); - - if ((sysctlbyname("hw.optional.arm.FEAT_AES", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Aes); - - if ((sysctlbyname("hw.optional.armv8_crc32", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Crc32); - - if ((sysctlbyname("hw.optional.arm.FEAT_DotProd", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Dp); - - if ((sysctlbyname("hw.optional.arm.FEAT_RDM", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Rdm); - - if ((sysctlbyname("hw.optional.arm.FEAT_SHA1", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Sha1); - - if ((sysctlbyname("hw.optional.arm.FEAT_SHA256", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Sha256); - - if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Atomics); - - if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0)) - flags->Set(InstructionSet_Rcpc); -#endif // HAVE_SYSCTLBYNAME - // CoreCLR SIMD and FP support is included in ARM64 baseline - // On exceptional basis platforms may leave out support, but CoreCLR does not - // yet support such platforms - // Set baseline flags if OS has not exposed mechanism for us to determine CPU capabilities - flags->Set(InstructionSet_ArmBase); - flags->Set(InstructionSet_AdvSimd); - flags->Set(InstructionSet_VectorT128); - // flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP); -#endif // HAVE_AUXV_HWCAP_H -} -#endif // HOST_ARM64 && TARGET_ARM64 diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index c4501546d836e..aa1c443cf56f1 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -634,38 +634,6 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT NESTED_END ProfileTailcallNaked, _TEXT -;; extern "C" DWORD __stdcall xmmYmmStateSupport(); -LEAF_ENTRY xmmYmmStateSupport, _TEXT - mov ecx, 0 ; Specify xcr0 - xgetbv ; result in EDX:EAX - and eax, 06H - cmp eax, 06H ; check OS has enabled both XMM and YMM state support - jne not_supported - mov eax, 1 - jmp done - not_supported: - mov eax, 0 - done: - ret -LEAF_END xmmYmmStateSupport, _TEXT - -;; extern "C" DWORD __stdcall avx512StateSupport(); -LEAF_ENTRY avx512StateSupport, _TEXT - mov ecx, 0 ; Specify xcr0 - xgetbv ; result in EDX:EAX - and eax, 0E6H - cmp eax, 0E6H ; check OS has enabled XMM, YMM and ZMM state support - jne not_supported - mov eax, 1 - jmp done - not_supported: - mov eax, 0 - done: - ret -LEAF_END avx512StateSupport, _TEXT - - - ; EXTERN_C void moveOWord(LPVOID* src, LPVOID* target); ; ; MOVDQA is not an atomic operation. You need to call this function in a crst. diff --git a/src/coreclr/vm/amd64/unixstubs.cpp b/src/coreclr/vm/amd64/unixstubs.cpp index d5bb054c9be5b..e818594f6aa51 100644 --- a/src/coreclr/vm/amd64/unixstubs.cpp +++ b/src/coreclr/vm/amd64/unixstubs.cpp @@ -10,49 +10,6 @@ extern "C" PORTABILITY_ASSERT("Implement for PAL"); } - DWORD xmmYmmStateSupport() - { - DWORD eax; - __asm(" xgetbv\n" \ - : "=a"(eax) /*output in eax*/\ - : "c"(0) /*inputs - 0 in ecx*/\ - : "edx" /* registers that are clobbered*/ - ); - // check OS has enabled both XMM and YMM state support - return ((eax & 0x06) == 0x06) ? 1 : 0; - } - -#ifndef XSTATE_MASK_AVX512 -#define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ -#endif // XSTATE_MASK_AVX512 - - DWORD avx512StateSupport() - { -#if defined(TARGET_OSX) - // MacOS has specialized behavior where it reports AVX512 support but doesnt - // actually enable AVX512 until the first instruction is executed and does so - // on a per thread basis. It does this by catching the faulting instruction and - // checking for the EVEX encoding. The kmov instructions, despite being part - // of the AVX512 instruction set are VEX encoded and dont trigger the enablement - // - // See https://github.com/apple/darwin-xnu/blob/main/osfmk/i386/fpu.c#L174 - - // TODO-AVX512: Enabling this for OSX requires ensuring threads explicitly trigger - // the AVX-512 enablement so that arbitrary usage doesn't cause downstream problems - - return false; -#else - DWORD eax; - __asm(" xgetbv\n" \ - : "=a"(eax) /*output in eax*/\ - : "c"(0) /*inputs - 0 in ecx*/\ - : "edx" /* registers that are clobbered*/ - ); - // check OS has enabled XMM, YMM and ZMM state support - return ((eax & 0x0E6) == 0x0E6) ? 1 : 0; -#endif - } - void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle) { } diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 51eec2b9dae9a..defe0ca118571 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -30,6 +30,9 @@ #include "configuration.h" +#include +#include + #ifdef HOST_64BIT #define CHECK_DUPLICATED_STRUCT_LAYOUTS #include "../debug/daccess/fntableaccess.h" @@ -1260,70 +1263,6 @@ EEJitManager::EEJitManager() SetCpuInfo(); } -#if defined(TARGET_X86) || defined(TARGET_AMD64) - -bool DoesOSSupportAVX() -{ - LIMITED_METHOD_CONTRACT; - -#ifndef TARGET_UNIX - // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX is supported - typedef DWORD64 (WINAPI *PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = WszLoadLibraryEx(WINDOWS_KERNEL32_DLLNAME_W, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); - if(hMod == NULL) - return FALSE; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return FALSE; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX) == 0) - { - return FALSE; - } -#endif // !TARGET_UNIX - - return TRUE; -} - -bool DoesOSSupportAVX512() -{ - LIMITED_METHOD_CONTRACT; - -#ifndef TARGET_UNIX - // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX512 is supported - typedef DWORD64 (WINAPI *PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = WszLoadLibraryEx(WINDOWS_KERNEL32_DLLNAME_W, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); - if(hMod == NULL) - return FALSE; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return FALSE; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX512) == 0) - { - return FALSE; - } -#endif // !TARGET_UNIX - - return TRUE; -} - -#endif // defined(TARGET_X86) || defined(TARGET_AMD64) - #ifdef TARGET_ARM64 extern "C" DWORD64 __stdcall GetDataCacheZeroIDReg(); #endif @@ -1338,531 +1277,262 @@ void EEJitManager::SetCpuInfo() CORJIT_FLAGS CPUCompileFlags; - // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits - uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; + int cpuFeatures = minipal_getcpufeatures(); #if defined(TARGET_X86) || defined(TARGET_AMD64) - CPUCompileFlags.Set(InstructionSet_X86Base); - - // NOTE: The below checks are based on the information reported by - // Intel® 64 and IA-32 Architectures Software Developer’s Manual. Volume 2 - // and - // AMD64 Architecture Programmer’s Manual. Volume 3 - // For more information, please refer to the CPUID instruction in the respective manuals - - union XarchCpuInfo - { - struct { - uint32_t SteppingId : 4; - uint32_t Model : 4; - uint32_t FamilyId : 4; - uint32_t ProcessorType : 2; - uint32_t Reserved1 : 2; // Unused bits in the CPUID result - uint32_t ExtendedModelId : 4; - uint32_t ExtendedFamilyId : 8; - uint32_t Reserved : 4; // Unused bits in the CPUID result - }; - - uint32_t Value; - } xarchCpuInfo; - - int cpuidInfo[4]; - - const int CPUID_EAX = 0; - const int CPUID_EBX = 1; - const int CPUID_ECX = 2; - const int CPUID_EDX = 3; - - __cpuid(cpuidInfo, 0x00000000); - - uint32_t maxCpuId = static_cast(cpuidInfo[CPUID_EAX]); - _ASSERTE(maxCpuId >= 1); - - bool isGenuineIntel = (cpuidInfo[CPUID_EBX] == 0x756E6547) && // Genu - (cpuidInfo[CPUID_EDX] == 0x49656E69) && // ineI - (cpuidInfo[CPUID_ECX] == 0x6C65746E); // ntel - - __cpuid(cpuidInfo, 0x00000001); - _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 15)) != 0); // CMOV - - xarchCpuInfo.Value = cpuidInfo[CPUID_EAX]; #if defined(TARGET_X86) && !defined(TARGET_WINDOWS) // Linux may still support no SSE/SSE2 for 32-bit - if ((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0) + if ((cpuFeatures & XArchIntrinsicConstants_VectorT128) == 0) { - EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE is not supported on the processor.")); - } - if ((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0) - { - EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE2 is not supported on the processor.")); + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("SSE and SSE2 processor support required.")); } #else - _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0); // SSE - _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 26)) != 0); // SSE2 + _ASSERTE((cpuFeatures & XArchIntrinsicConstants_VectorT128) != 0); #endif - CPUCompileFlags.Set(InstructionSet_SSE); - CPUCompileFlags.Set(InstructionSet_SSE2); CPUCompileFlags.Set(InstructionSet_VectorT128); - if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI - { - CPUCompileFlags.Set(InstructionSet_AES); - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ - { - CPUCompileFlags.Set(InstructionSet_PCLMULQDQ); - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3 - { - CPUCompileFlags.Set(InstructionSet_SSE3); - - if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3 - { - CPUCompileFlags.Set(InstructionSet_SSSE3); - - if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1 - { - CPUCompileFlags.Set(InstructionSet_SSE41); - - if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2 - { - CPUCompileFlags.Set(InstructionSet_SSE42); - - if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE - { - CPUCompileFlags.Set(InstructionSet_MOVBE); - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT - { - CPUCompileFlags.Set(InstructionSet_POPCNT); - } - - const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE - | (1 << 28); // AVX - - if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) - { - if(DoesOSSupportAVX() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 - { - CPUCompileFlags.Set(InstructionSet_AVX); - - if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA - { - CPUCompileFlags.Set(InstructionSet_FMA); - } - - if (maxCpuId >= 0x07) - { - __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - - if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 - { - CPUCompileFlags.Set(InstructionSet_AVX2); - - if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) - { - // We allow 256-bit Vector by default - CPUCompileFlags.Clear(InstructionSet_VectorT128); - CPUCompileFlags.Set(InstructionSet_VectorT256); - } - - if (DoesOSSupportAVX512() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 - { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F - { - CPUCompileFlags.Set(InstructionSet_AVX512F); - - // TODO-XArch: Add support for 512-bit Vector - assert(!CPUCompileFlags.IsSet(InstructionSet_VectorT512)); - - bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL - { - CPUCompileFlags.Set(InstructionSet_AVX512F_VL); - isAVX512_VLSupported = true; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW - { - CPUCompileFlags.Set(InstructionSet_AVX512BW); - if (isAVX512_VLSupported) // AVX512BW_VL - { - CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD - { - CPUCompileFlags.Set(InstructionSet_AVX512CD); - if (isAVX512_VLSupported) // AVX512CD_VL - { - CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ - { - CPUCompileFlags.Set(InstructionSet_AVX512DQ); - if (isAVX512_VLSupported) // AVX512DQ_VL - { - CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); - } - } - - if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI - { - CPUCompileFlags.Set(InstructionSet_AVX512VBMI); - if (isAVX512_VLSupported) // AVX512VBMI_VL - { - CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL); - } - } - } - } - - __cpuidex(cpuidInfo, 0x00000007, 0x00000001); - - if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI - { - CPUCompileFlags.Set(InstructionSet_AVXVNNI); - } - } - } - } - } - } - } - } - } - - if (maxCpuId >= 0x07) - { - __cpuidex(cpuidInfo, 0x00000007, 0x00000000); - - if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1 - { - CPUCompileFlags.Set(InstructionSet_BMI1); - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2 - { - CPUCompileFlags.Set(InstructionSet_BMI2); - } - - if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0) - { - CPUCompileFlags.Set(InstructionSet_X86Serialize); // SERIALIZE - } - } - - __cpuid(cpuidInfo, 0x80000000); - uint32_t maxCpuIdEx = static_cast(cpuidInfo[CPUID_EAX]); - - if (maxCpuIdEx >= 0x80000001) - { - __cpuid(cpuidInfo, 0x80000001); - - if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT - { - CPUCompileFlags.Set(InstructionSet_LZCNT); - } - } -#endif // defined(TARGET_X86) || defined(TARGET_AMD64) - -#if defined(TARGET_ARM64) -#if defined(TARGET_UNIX) - PAL_GetJitCpuCapabilityFlags(&CPUCompileFlags); - - // For HOST_ARM64, if OS has exposed mechanism to detect CPU capabilities, make sure it has AdvSimd capability. - // For other cases i.e. if !HOST_ARM64 but TARGET_ARM64 or HOST_ARM64 but OS doesn't expose way to detect - // CPU capabilities, we always enable AdvSimd flags by default. - // - if (!CPUCompileFlags.IsSet(InstructionSet_AdvSimd)) - { - EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("AdvSimd is not supported on the processor.")); - } -#elif defined(HOST_64BIT) - // FP and SIMD support are enabled by default - CPUCompileFlags.Set(InstructionSet_ArmBase); - CPUCompileFlags.Set(InstructionSet_AdvSimd); - CPUCompileFlags.Set(InstructionSet_VectorT128); + // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits + uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; - // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE (30) - if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + if (((cpuFeatures & XArchIntrinsicConstants_VectorT256) != 0) && ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256))) { - CPUCompileFlags.Set(InstructionSet_Aes); - CPUCompileFlags.Set(InstructionSet_Sha1); - CPUCompileFlags.Set(InstructionSet_Sha256); - } - // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE (31) - if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) - { - CPUCompileFlags.Set(InstructionSet_Crc32); + // We allow 256-bit Vector by default + CPUCompileFlags.Clear(InstructionSet_VectorT128); + CPUCompileFlags.Set(InstructionSet_VectorT256); } -// Older version of SDK would return false for these intrinsics -// but make sure we pass the right values to the APIs -#ifndef PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34 -#endif -#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 -#endif -#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 -#endif - - // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE (34) - if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE)) - { - CPUCompileFlags.Set(InstructionSet_Atomics); - } + // TODO-XArch: Add support for 512-bit Vector + _ASSERTE(!CPUCompileFlags.IsSet(InstructionSet_VectorT512)); - // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE (43) - if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { - CPUCompileFlags.Set(InstructionSet_Dp); + CPUCompileFlags.Set(InstructionSet_X86Base); } - // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE (45) - if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE)) + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE)) { - CPUCompileFlags.Set(InstructionSet_Rcpc); + CPUCompileFlags.Set(InstructionSet_SSE); } -#endif // HOST_64BIT - if (GetDataCacheZeroIDReg() == 4) + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2)) { - // DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1). - // DCZID_EL0<3:0> (BS) specifies Log2 of the block size in words. - // - // We set the flag when the instruction is permitted and the block size is 64 bytes. - CPUCompileFlags.Set(InstructionSet_Dczva); + CPUCompileFlags.Set(InstructionSet_SSE2); } - if (CPUCompileFlags.IsSet(InstructionSet_Atomics)) + if (((cpuFeatures & XArchIntrinsicConstants_Aes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAES)) { - g_arm64_atomics_present = true; + CPUCompileFlags.Set(InstructionSet_AES); } -#endif // TARGET_ARM64 - - // Now that we've queried the actual hardware support, we need to adjust what is actually supported based - // on some externally available config switches that exist so users can test code for downlevel hardware. -#if defined(TARGET_X86) || defined(TARGET_AMD64) - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX)) { - CPUCompileFlags.Clear(InstructionSet_X86Base); + CPUCompileFlags.Set(InstructionSet_AVX); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAES)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2)) { - CPUCompileFlags.Clear(InstructionSet_AES); + CPUCompileFlags.Set(InstructionSet_AVX2); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) { - CPUCompileFlags.Clear(InstructionSet_AVX); + CPUCompileFlags.Set(InstructionSet_AVX512F); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512f_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) { - CPUCompileFlags.Clear(InstructionSet_AVX2); + CPUCompileFlags.Set(InstructionSet_AVX512F_VL); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) { - CPUCompileFlags.Clear(InstructionSet_AVX512F); + CPUCompileFlags.Set(InstructionSet_AVX512BW); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) { - CPUCompileFlags.Clear(InstructionSet_AVX512F_VL); + CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) { - CPUCompileFlags.Clear(InstructionSet_AVX512BW); + CPUCompileFlags.Set(InstructionSet_AVX512CD); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) { - CPUCompileFlags.Clear(InstructionSet_AVX512BW_VL); + CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) { - CPUCompileFlags.Clear(InstructionSet_AVX512CD); + CPUCompileFlags.Set(InstructionSet_AVX512DQ); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) { - CPUCompileFlags.Clear(InstructionSet_AVX512CD_VL); + CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI)) { - CPUCompileFlags.Clear(InstructionSet_AVX512DQ); + CPUCompileFlags.Set(InstructionSet_AVX512VBMI); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) { - CPUCompileFlags.Clear(InstructionSet_AVX512DQ_VL); + CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI)) + if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) { - CPUCompileFlags.Clear(InstructionSet_AVX512VBMI); + CPUCompileFlags.Set(InstructionSet_AVXVNNI); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Bmi1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI1)) { - CPUCompileFlags.Clear(InstructionSet_AVX512VBMI_VL); + CPUCompileFlags.Set(InstructionSet_BMI1); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) + if (((cpuFeatures & XArchIntrinsicConstants_Bmi2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI2)) { - CPUCompileFlags.Clear(InstructionSet_AVXVNNI); + CPUCompileFlags.Set(InstructionSet_BMI2); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI1)) + if (((cpuFeatures & XArchIntrinsicConstants_Fma) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableFMA)) { - CPUCompileFlags.Clear(InstructionSet_BMI1); + CPUCompileFlags.Set(InstructionSet_FMA); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI2)) + if (((cpuFeatures & XArchIntrinsicConstants_Lzcnt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableLZCNT)) { - CPUCompileFlags.Clear(InstructionSet_BMI2); + CPUCompileFlags.Set(InstructionSet_LZCNT); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableFMA)) + if (((cpuFeatures & XArchIntrinsicConstants_Pclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePCLMULQDQ)) { - CPUCompileFlags.Clear(InstructionSet_FMA); + CPUCompileFlags.Set(InstructionSet_PCLMULQDQ); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableLZCNT)) + if (((cpuFeatures & XArchIntrinsicConstants_Movbe) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableMOVBE)) { - CPUCompileFlags.Clear(InstructionSet_LZCNT); + CPUCompileFlags.Set(InstructionSet_MOVBE); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePCLMULQDQ)) + if (((cpuFeatures & XArchIntrinsicConstants_Popcnt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT)) { - CPUCompileFlags.Clear(InstructionSet_PCLMULQDQ); + CPUCompileFlags.Set(InstructionSet_POPCNT); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableMOVBE)) + // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that + // is a prexisting config flag that controls the SSE3+ ISAs + if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) { - CPUCompileFlags.Clear(InstructionSet_MOVBE); + CPUCompileFlags.Set(InstructionSet_SSE3); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT)) + if (((cpuFeatures & XArchIntrinsicConstants_Sse41) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE41)) { - CPUCompileFlags.Clear(InstructionSet_POPCNT); + CPUCompileFlags.Set(InstructionSet_SSE41); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE)) + if (((cpuFeatures & XArchIntrinsicConstants_Sse42) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42)) { - CPUCompileFlags.Clear(InstructionSet_SSE); + CPUCompileFlags.Set(InstructionSet_SSE42); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2)) + if (((cpuFeatures & XArchIntrinsicConstants_Ssse3) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSSE3)) { - CPUCompileFlags.Clear(InstructionSet_SSE2); + CPUCompileFlags.Set(InstructionSet_SSSE3); } - // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that - // is a prexisting config flag that controls the SSE3+ ISAs - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) || - !CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) + if (((cpuFeatures & XArchIntrinsicConstants_Serialize) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableX86Serialize)) { - CPUCompileFlags.Clear(InstructionSet_SSE3); + CPUCompileFlags.Set(InstructionSet_X86Serialize); } +#elif defined(TARGET_ARM64) - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE41)) +#if !defined(TARGET_WINDOWS) + // Linux may still support no AdvSimd + if ((cpuFeatures & ARM64IntrinsicConstants_VectorT128) == 0) { - CPUCompileFlags.Clear(InstructionSet_SSE41); + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("AdvSimd processor support required.")); } +#else + _ASSERTE((cpuFeatures & ARM64IntrinsicConstants_VectorT128) != 0); +#endif - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42)) - { - CPUCompileFlags.Clear(InstructionSet_SSE42); - } + CPUCompileFlags.Set(InstructionSet_VectorT128); - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSSE3)) + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { - CPUCompileFlags.Clear(InstructionSet_SSSE3); + CPUCompileFlags.Set(InstructionSet_ArmBase); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableX86Serialize)) - { - CPUCompileFlags.Clear(InstructionSet_X86Serialize); - } -#elif defined(TARGET_ARM64) - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) + if (((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64AdvSimd)) { - CPUCompileFlags.Clear(InstructionSet_ArmBase); + CPUCompileFlags.Set(InstructionSet_AdvSimd); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64AdvSimd)) + if (((cpuFeatures & ARM64IntrinsicConstants_Aes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Aes)) { - CPUCompileFlags.Clear(InstructionSet_AdvSimd); + CPUCompileFlags.Set(InstructionSet_Aes); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Aes)) + if (((cpuFeatures & ARM64IntrinsicConstants_Atomics) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Atomics)) { - CPUCompileFlags.Clear(InstructionSet_Aes); + CPUCompileFlags.Set(InstructionSet_Atomics); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Atomics)) + if (((cpuFeatures & ARM64IntrinsicConstants_Rcpc) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc)) { - CPUCompileFlags.Clear(InstructionSet_Atomics); + CPUCompileFlags.Set(InstructionSet_Rcpc); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc)) + if (((cpuFeatures & ARM64IntrinsicConstants_Crc32) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32)) { - CPUCompileFlags.Clear(InstructionSet_Rcpc); + CPUCompileFlags.Set(InstructionSet_Crc32); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32)) + if (((cpuFeatures & ARM64IntrinsicConstants_Dp) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dp)) { - CPUCompileFlags.Clear(InstructionSet_Crc32); + CPUCompileFlags.Set(InstructionSet_Dp); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dczva)) + if (((cpuFeatures & ARM64IntrinsicConstants_Rdm) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rdm)) { - CPUCompileFlags.Clear(InstructionSet_Dczva); + CPUCompileFlags.Set(InstructionSet_Rdm); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dp)) + if (((cpuFeatures & ARM64IntrinsicConstants_Sha1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha1)) { - CPUCompileFlags.Clear(InstructionSet_Dp); + CPUCompileFlags.Set(InstructionSet_Sha1); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rdm)) + if (((cpuFeatures & ARM64IntrinsicConstants_Sha256) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha256)) { - CPUCompileFlags.Clear(InstructionSet_Rdm); + CPUCompileFlags.Set(InstructionSet_Sha256); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha1)) + // DCZID_EL0<4> (DZP) indicates whether use of DC ZVA instructions is permitted (0) or prohibited (1). + // DCZID_EL0<3:0> (BS) specifies Log2 of the block size in words. + // + // We set the flag when the instruction is permitted and the block size is 64 bytes. + if ((GetDataCacheZeroIDReg() == 4) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Dczva)) { - CPUCompileFlags.Clear(InstructionSet_Sha1); + CPUCompileFlags.Set(InstructionSet_Dczva); } - if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Sha256)) + if ((cpuFeatures & ARM64IntrinsicConstants_Atomics) != 0) { - CPUCompileFlags.Clear(InstructionSet_Sha256); + g_arm64_atomics_present = true; } #endif -#if defined(TARGET_LOONGARCH64) - // TODO-LoongArch64: set LoongArch64's InstructionSet features ! -#endif // TARGET_LOONGARCH64 - // These calls are very important as it ensures the flags are consistent with any // removals specified above. This includes removing corresponding 64-bit ISAs // and any other implications such as SSE2 depending on SSE or AdvSimd on ArmBase @@ -1871,8 +1541,42 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.EnsureValidInstructionSetSupport(); #if defined(TARGET_X86) || defined(TARGET_AMD64) + int cpuidInfo[4]; + + const int CPUID_EAX = 0; + const int CPUID_EBX = 1; + const int CPUID_ECX = 2; + const int CPUID_EDX = 3; + + __cpuid(cpuidInfo, 0x00000000); + + bool isGenuineIntel = (cpuidInfo[CPUID_EBX] == 0x756E6547) && // Genu + (cpuidInfo[CPUID_EDX] == 0x49656E69) && // ineI + (cpuidInfo[CPUID_ECX] == 0x6C65746E); // ntel + if (isGenuineIntel) { + union XarchCpuInfo + { + struct { + uint32_t SteppingId : 4; + uint32_t Model : 4; + uint32_t FamilyId : 4; + uint32_t ProcessorType : 2; + uint32_t Reserved1 : 2; // Unused bits in the CPUID result + uint32_t ExtendedModelId : 4; + uint32_t ExtendedFamilyId : 8; + uint32_t Reserved : 4; // Unused bits in the CPUID result + }; + + uint32_t Value; + } xarchCpuInfo; + + __cpuid(cpuidInfo, 0x00000001); + _ASSERTE((cpuidInfo[CPUID_EDX] & (1 << 15)) != 0); // CMOV + + xarchCpuInfo.Value = cpuidInfo[CPUID_EAX]; + // Some architectures can experience frequency throttling when executing // executing 512-bit width instructions. To account for this we set the // default preferred vector width to 256-bits in some scenarios. Power diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp index e51a324813ef4..08360e9ff0c06 100644 --- a/src/coreclr/vm/i386/jitinterfacex86.cpp +++ b/src/coreclr/vm/i386/jitinterfacex86.cpp @@ -22,6 +22,8 @@ #include "eventtrace.h" #include "threadsuspend.h" +#include + #if defined(_DEBUG) && !defined (WRITE_BARRIER_CHECK) #define WRITE_BARRIER_CHECK 1 #endif diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index b91b37bf88580..a606bfbe76b4d 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -99,60 +99,18 @@ static uint32_t avx512StateSupport() return ((_xgetbv(0) & 0xE6) == 0x0E6) ? 1 : 0; } -static HMODULE LoadKernel32dll() -{ - return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); -} - static bool IsAvxEnabled() { - typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = LoadKernel32dll(); - if (hMod == NULL) - return FALSE; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return FALSE; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX) == 0) - { - return FALSE; - } - - return TRUE; + DWORD64 FeatureMask = GetEnabledXStateFeatures(); + return ((FeatureMask & XSTATE_MASK_AVX) != 0); } static bool IsAvx512Enabled() { - typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); - PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - - HMODULE hMod = LoadKernel32dll(); - if (hMod == NULL) - return FALSE; - - pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hMod, "GetEnabledXStateFeatures"); - - if (pfnGetEnabledXStateFeatures == NULL) - { - return FALSE; - } - - DWORD64 FeatureMask = pfnGetEnabledXStateFeatures(); - if ((FeatureMask & XSTATE_MASK_AVX512) == 0) - { - return FALSE; - } - - return TRUE; + DWORD64 FeatureMask = GetEnabledXStateFeatures(); + return ((FeatureMask & XSTATE_MASK_AVX512) != 0); } + #endif // defined(TARGET_X86) || defined(TARGET_AMD64) #endif // TARGET_WINDOWS @@ -524,18 +482,6 @@ int minipal_getcpufeatures(void) #endif // TARGET_UNIX #if defined(TARGET_WINDOWS) -// Older version of SDK would return false for these intrinsics -// but make sure we pass the right values to the APIs -#ifndef PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE 34 -#endif -#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 -#endif -#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45 -#endif - // FP and SIMD support are enabled by default result |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; diff --git a/src/native/minipal/cpuid.h b/src/native/minipal/cpuid.h index b8a6bd23b65f4..f6d39ba6783d8 100644 --- a/src/native/minipal/cpuid.h +++ b/src/native/minipal/cpuid.h @@ -4,15 +4,15 @@ #ifndef HAVE_MINIPAL_CPUID_H #define HAVE_MINIPAL_CPUID_H -#if defined(TARGET_X86) || defined(TARGET_AMD64) +#if defined(HOST_X86) || defined(HOST_AMD64) -#if defined(TARGET_WINDOWS) +#if defined(HOST_WINDOWS) #include -#endif // TARGET_WINDOWS +#endif // HOST_WINDOWS -#if defined(TARGET_UNIX) +#if defined(HOST_UNIX) // MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures // We define matching signatures for use on Unix platforms. // @@ -48,7 +48,7 @@ static void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id) void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id); #endif -#endif // TARGET_UNIX -#endif // defined(TARGET_X86) || defined(TARGET_AMD64) +#endif // HOST_UNIX +#endif // defined(HOST_X86) || defined(HOST_AMD64) #endif