From 9f8f67cc0bcddb626bb8aa744b5640d588baecdf Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 13:41:46 -0700 Subject: [PATCH 01/14] Add APX doc. --- docs/design/features/xarch-apx.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/design/features/xarch-apx.md diff --git a/docs/design/features/xarch-apx.md b/docs/design/features/xarch-apx.md new file mode 100644 index 0000000000000..8997da56fbdb8 --- /dev/null +++ b/docs/design/features/xarch-apx.md @@ -0,0 +1,3 @@ +# APX Integration in .NET + +Let's keep documentation on APX integration and notes on things here. I will evolve this as necessary. \ No newline at end of file From a68865ca98c60e0bf8cb32443536c26bbad701dd Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 13:46:02 -0700 Subject: [PATCH 02/14] script-gen changes. --- src/coreclr/inc/corinfoinstructionset.h | 146 +++++++++------- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 1 + .../Runtime/ReadyToRunInstructionSet.cs | 1 + .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 + .../JitInterface/CorInfoInstructionSet.cs | 157 +++++++++++------- .../ThunkGenerator/InstructionSetDesc.txt | 4 + 7 files changed, 190 insertions(+), 133 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 43f2be795314f..09ac2d44e3af1 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -81,38 +81,40 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=36, InstructionSet_VectorT256=37, InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512F_VL_X64=60, - InstructionSet_AVX512BW_X64=61, - InstructionSet_AVX512BW_VL_X64=62, - InstructionSet_AVX512CD_X64=63, - InstructionSet_AVX512CD_VL_X64=64, - InstructionSet_AVX512DQ_X64=65, - InstructionSet_AVX512DQ_VL_X64=66, - InstructionSet_AVX512VBMI_X64=67, - InstructionSet_AVX512VBMI_VL_X64=68, - InstructionSet_AVX10v1_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_APX=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_EVEX_X64=59, + InstructionSet_AVX512F_X64=60, + InstructionSet_AVX512F_VL_X64=61, + InstructionSet_AVX512BW_X64=62, + InstructionSet_AVX512BW_VL_X64=63, + InstructionSet_AVX512CD_X64=64, + InstructionSet_AVX512CD_VL_X64=65, + InstructionSet_AVX512DQ_X64=66, + InstructionSet_AVX512DQ_VL_X64=67, + InstructionSet_AVX512VBMI_X64=68, + InstructionSet_AVX512VBMI_VL_X64=69, + InstructionSet_AVX10v1_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_APX_X64=72, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -153,38 +155,40 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=36, InstructionSet_VectorT256=37, InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512F_VL_X64=60, - InstructionSet_AVX512BW_X64=61, - InstructionSet_AVX512BW_VL_X64=62, - InstructionSet_AVX512CD_X64=63, - InstructionSet_AVX512CD_VL_X64=64, - InstructionSet_AVX512DQ_X64=65, - InstructionSet_AVX512DQ_VL_X64=66, - InstructionSet_AVX512VBMI_X64=67, - InstructionSet_AVX512VBMI_VL_X64=68, - InstructionSet_AVX10v1_X64=69, - InstructionSet_AVX10v1_V512_X64=70, + InstructionSet_APX=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_EVEX_X64=59, + InstructionSet_AVX512F_X64=60, + InstructionSet_AVX512F_VL_X64=61, + InstructionSet_AVX512BW_X64=62, + InstructionSet_AVX512BW_VL_X64=63, + InstructionSet_AVX512CD_X64=64, + InstructionSet_AVX512CD_VL_X64=65, + InstructionSet_AVX512DQ_X64=66, + InstructionSet_AVX512DQ_VL_X64=67, + InstructionSet_AVX512VBMI_X64=68, + InstructionSet_AVX512VBMI_VL_X64=69, + InstructionSet_AVX10v1_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_APX_X64=72, #endif // TARGET_X86 }; @@ -364,6 +368,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v1_X64); if (HasInstructionSet(InstructionSet_AVX10v1_V512)) AddInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet_APX)) + AddInstructionSet(InstructionSet_APX_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -572,6 +578,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_APX) && !resultflags.HasInstructionSet(InstructionSet_APX_X64)) + resultflags.RemoveInstructionSet(InstructionSet_APX); + if (resultflags.HasInstructionSet(InstructionSet_APX_X64) && !resultflags.HasInstructionSet(InstructionSet_APX)) + resultflags.RemoveInstructionSet(InstructionSet_APX_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -990,6 +1000,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_APX : + return "APX"; + case InstructionSet_APX_X64 : + return "APX_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -1068,6 +1082,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "VectorT256"; case InstructionSet_VectorT512 : return "VectorT512"; + case InstructionSet_APX : + return "APX"; #endif // TARGET_X86 default: @@ -1138,6 +1154,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1175,6 +1192,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; + case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 6aef89b2afd76..b307e4659215e 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 7f7fd340-4779-455a-8046-628f3cd8c3c7 */ - 0x7f7fd340, - 0x4779, - 0x455a, - {0x80, 0x46, 0x62, 0x8f, 0x3c, 0xd8, 0xc3, 0xc7} +constexpr GUID JITEEVersionIdentifier = { /* c3886dcb-d533-44b8-86c0-ff79ac4ce9df */ + 0xc3886dcb, + 0xd533, + 0x44b8, + {0x86, 0xc0, 0xff, 0x79, 0xac, 0x4c, 0xe9, 0xdf} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 4ad8c6b4e5912..434e9bbd07bed 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -55,6 +55,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx10v1=44, READYTORUN_INSTRUCTION_Avx10v1_V512=46, READYTORUN_INSTRUCTION_EVEX=47, + READYTORUN_INSTRUCTION_Apx=48, }; diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index dd6a57731444e..fe151f54a7369 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -58,6 +58,7 @@ public enum ReadyToRunInstructionSet Avx10v1=44, Avx10v1_V512=46, EVEX=47, + Apx=48, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 361aa92bea955..ed236ca746931 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -127,6 +127,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X64_APX_X64: return ReadyToRunInstructionSet.Apx; default: throw new Exception("Unknown instruction set"); } @@ -206,6 +208,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; + case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx; + case InstructionSet.X86_APX_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 42807bfcec1d9..366d974190407 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -79,6 +79,7 @@ public enum InstructionSet X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, + X64_APX = InstructionSet_X64.APX, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -111,6 +112,7 @@ public enum InstructionSet X64_AVX512VBMI_VL_X64 = InstructionSet_X64.AVX512VBMI_VL_X64, X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64, X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, + X64_APX_X64 = InstructionSet_X64.APX_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -149,6 +151,7 @@ public enum InstructionSet X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, + X86_APX = InstructionSet_X86.APX, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -181,6 +184,7 @@ public enum InstructionSet X86_AVX512VBMI_VL_X64 = InstructionSet_X86.AVX512VBMI_VL_X64, X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64, X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, + X86_APX_X64 = InstructionSet_X86.APX_X64, } public enum InstructionSet_ARM64 { @@ -255,38 +259,40 @@ public enum InstructionSet_X64 VectorT128 = 36, VectorT256 = 37, VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512F_VL_X64 = 60, - AVX512BW_X64 = 61, - AVX512BW_VL_X64 = 62, - AVX512CD_X64 = 63, - AVX512CD_VL_X64 = 64, - AVX512DQ_X64 = 65, - AVX512DQ_VL_X64 = 66, - AVX512VBMI_X64 = 67, - AVX512VBMI_VL_X64 = 68, - AVX10v1_X64 = 69, - AVX10v1_V512_X64 = 70, + APX = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + EVEX_X64 = 59, + AVX512F_X64 = 60, + AVX512F_VL_X64 = 61, + AVX512BW_X64 = 62, + AVX512BW_VL_X64 = 63, + AVX512CD_X64 = 64, + AVX512CD_VL_X64 = 65, + AVX512DQ_X64 = 66, + AVX512DQ_VL_X64 = 67, + AVX512VBMI_X64 = 68, + AVX512VBMI_VL_X64 = 69, + AVX10v1_X64 = 70, + AVX10v1_V512_X64 = 71, + APX_X64 = 72, } public enum InstructionSet_X86 @@ -331,38 +337,40 @@ public enum InstructionSet_X86 VectorT128 = 36, VectorT256 = 37, VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512F_VL_X64 = 60, - AVX512BW_X64 = 61, - AVX512BW_VL_X64 = 62, - AVX512CD_X64 = 63, - AVX512CD_VL_X64 = 64, - AVX512DQ_X64 = 65, - AVX512DQ_VL_X64 = 66, - AVX512VBMI_X64 = 67, - AVX512VBMI_VL_X64 = 68, - AVX10v1_X64 = 69, - AVX10v1_V512_X64 = 70, + APX = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + EVEX_X64 = 59, + AVX512F_X64 = 60, + AVX512F_VL_X64 = 61, + AVX512BW_X64 = 62, + AVX512BW_VL_X64 = 63, + AVX512CD_X64 = 64, + AVX512CD_VL_X64 = 65, + AVX512DQ_X64 = 66, + AVX512DQ_VL_X64 = 67, + AVX512VBMI_X64 = 68, + AVX512VBMI_VL_X64 = 69, + AVX10v1_X64 = 70, + AVX10v1_V512_X64 = 71, + APX_X64 = 72, } public unsafe struct InstructionSetFlags : IEnumerable @@ -710,6 +718,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX)) + resultflags.AddInstructionSet(InstructionSet.X64_APX_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -1051,6 +1063,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_APX_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_APX); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1376,6 +1390,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true); break; case TargetArchitecture.X86: @@ -1417,6 +1432,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); + yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true); break; } } @@ -1512,6 +1528,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v1_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + if (HasInstructionSet(InstructionSet.X64_APX)) + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1569,6 +1587,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X64_APX_X64); break; case TargetArchitecture.X86: @@ -1604,6 +1623,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); + AddInstructionSet(InstructionSet.X86_APX_X64); break; } } @@ -1908,6 +1928,12 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "VectorT512": { return InstructionSet.X64_VectorT512; } + case "Apx": + if (nestedTypeName == "X64") + { return InstructionSet.X64_APX_X64; } + else + { return InstructionSet.X64_APX; } + } break; @@ -2020,6 +2046,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "VectorT512": { return InstructionSet.X86_VectorT512; } + case "Apx": + { return InstructionSet.X86_APX; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index dbb8e6efd20ad..cd4e55bdca945 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -22,6 +22,8 @@ ; DO NOT CHANGE R2R NUMERIC VALUES OF THE EXISTING SETS. Changing R2R numeric values definitions would be R2R format breaking change. +; The ISA definiitons should also be mapped to `hwintrinsicIsaRangeArray` in hwintrinsic.cpp. + ; Definition of X86 instruction sets definearch ,X86 ,32Bit ,X64, X64 @@ -63,6 +65,7 @@ instructionset ,X86 ,Avx10v1_V512 , ,46 ,AVX10v1_V512 instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 +instructionset ,X86 ,Apx , ,48 ,APX ,apx instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -96,6 +99,7 @@ instructionset64bit,X86 ,AVX512VBMI instructionset64bit,X86 ,AVX512VBMI_VL instructionset64bit,X86 ,AVX10v1 instructionset64bit,X86 ,AVX10v1_V512 +instructionset64bit,X86 ,APX vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 From 6dfb92b1085d40f991e32164fdfef9e1c9ac6180 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 14:16:05 -0700 Subject: [PATCH 03/14] XSTATE changes --- src/coreclr/jit/hwintrinsic.cpp | 1 + .../Runtime/windows/PalRedhawkMinWin.cpp | 10 ++- src/coreclr/pal/inc/pal.h | 21 +++++ src/coreclr/pal/src/arch/amd64/asmconstants.h | 5 +- src/coreclr/pal/src/arch/amd64/context2.S | 23 +++++ src/coreclr/pal/src/include/pal/context.h | 32 ++++++- src/coreclr/pal/src/thread/context.cpp | 85 +++++++++++++++++++ src/coreclr/vm/amd64/asmconstants.h | 2 +- src/coreclr/vm/threadsuspend.cpp | 9 +- 9 files changed, 179 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index cd8ac360b6ba6..4ee918637c42b 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -804,6 +804,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 + { NI_Illegal, NI_Illegal }, // APX { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 86013f7a964d2..f4aae9000cef7 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -37,6 +37,10 @@ #define REDHAWK_PALEXPORT extern "C" #define REDHAWK_PALAPI __stdcall +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + // Index for the fiber local storage of the attached thread pointer static uint32_t g_flsIndex = FLS_OUT_OF_INDEXES; @@ -541,7 +545,7 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB #endif //TARGET_X86 #if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512; + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask; #elif defined(TARGET_ARM64) const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; @@ -632,9 +636,9 @@ REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetCompleteThreadCont // This should not normally fail. // The system silently ignores any feature specified in the FeatureMask which is not enabled on the processor. #if defined(TARGET_X86) || defined(TARGET_AMD64) - if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) + if (!SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX)) { - _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512"); + _ASSERTE(!"Could not apply XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX"); return FALSE; } #elif defined(TARGET_ARM64) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index e4a520c1dcf5c..209e71420e6e1 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1626,6 +1626,27 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm30; M512 Zmm31; }; + + struct + { + DWORD64 Egpr16; + DWORD64 Egpr17; + DWORD64 Egpr18; + DWORD64 Egpr19; + DWORD64 Egpr20; + DWORD64 Egpr21; + DWORD64 Egpr22; + DWORD64 Egpr23; + DWORD64 Egpr24; + DWORD64 Egpr25; + DWORD64 Egpr26; + DWORD64 Egpr27; + DWORD64 Egpr28; + DWORD64 Egpr29; + DWORD64 Egpr30; + DWORD64 Egpr31; + }; + } CONTEXT, *PCONTEXT, *LPCONTEXT; // diff --git a/src/coreclr/pal/src/arch/amd64/asmconstants.h b/src/coreclr/pal/src/arch/amd64/asmconstants.h index d5a72cf6eda23..ed63a88e0e817 100644 --- a/src/coreclr/pal/src/arch/amd64/asmconstants.h +++ b/src/coreclr/pal/src/arch/amd64/asmconstants.h @@ -8,12 +8,14 @@ #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) +#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (1 << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((1 << (XSTATE_AVX512_KMASK)) | \ (1 << (XSTATE_AVX512_ZMM_H)) | \ (1 << (XSTATE_AVX512_ZMM))) +#define XSTATE_MASK_APX (1 << (XSTATE_APX)) // The arch bit is normally set in the flag constants below. Since this is already arch-specific code and the arch bit is not // relevant, the arch bit is excluded from the flag constants below for simpler tests. @@ -91,7 +93,8 @@ #define CONTEXT_KMask0 CONTEXT_Ymm0H+(16*16) #define CONTEXT_Zmm0H CONTEXT_KMask0+(8*8) #define CONTEXT_Zmm16 CONTEXT_Zmm0H+(32*16) -#define CONTEXT_Size CONTEXT_Zmm16+(64*16) +#define CONTEXT_Egpr CONTEXT_Zmm16+(16*8) +#define CONTEXT_Size CONTEXT_Egpr+(64*16) #else // HOST_64BIT diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index dba772f9dbbf5..ad7e9b35ad528 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -183,6 +183,29 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): kmovq k6, qword ptr [rdi + (CONTEXT_KMask0 + 6 * 8)] kmovq k7, qword ptr [rdi + (CONTEXT_KMask0 + 7 * 8)] + // TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, + // we are currently using bare value to hack it through the build process, and test the implementation through CI. + // those changes will be removed when we have the OS support for APX. + test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], 524288 + je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) + + mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] + mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] + mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] + mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] + mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] + mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] + mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] + mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] + mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] + mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] + mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] + mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] + mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] + mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] + mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] + mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] + LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): test BYTE PTR [rdi + CONTEXT_ContextFlags], CONTEXT_CONTROL diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 932b40c52a821..7a12d2a72ab56 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -58,6 +58,7 @@ using asm_sigcontext::_xstate; #if defined(XSTATE_SUPPORTED) || (defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS)) bool Xstate_IsAvx512Supported(); +bool Xstate_IsApxSupported(); #endif // XSTATE_SUPPORTED || (HOST_AMD64 && HAVE_MACH_EXCEPTIONS) #if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) @@ -467,6 +468,14 @@ struct sve_context { #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM) #endif // XFEATURE_MASK_AVX512 +#ifndef XSTATE_APX +#define XSTATE_APX 19 +#endif // XSTATE_APX + +#ifndef XFEATURE_MASK_APX +#define XFEATURE_MASK_APX (1 << XSTATE_APX) +#endif // XFEATURE_MASK_APX + #if HAVE__FPX_SW_BYTES_WITH_XSTATE_BV #define FPREG_FpxSwBytes_xfeatures(uc) FPREG_FpxSwBytes(uc)->xstate_bv #else @@ -489,7 +498,7 @@ struct Xstate_ExtendedFeature uint32_t size; }; -#define Xstate_ExtendedFeatures_Count (XSTATE_AVX512_ZMM + 1) +#define Xstate_ExtendedFeatures_Count (XSTATE_APX + 1) extern Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; inline _fpx_sw_bytes *FPREG_FpxSwBytes(const ucontext_t *uc) @@ -626,6 +635,27 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) _ASSERTE(FPREG_HasAvx512Registers(uc)); return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_AVX512_ZMM); } + +inline bool FPREG_HasApxRegisters(const ucontext_t *uc) +{ + if (!FPREG_HasExtendedState(uc)) + { + return false; + } + + if ((FPREG_FpxSwBytes_xfeatures(uc) & XFEATURE_MASK_APX) != XFEATURE_MASK_APX) + { + return false; + } + + return Xstate_IsApxSupported(); +} + +inline void *FPREG_Xstate_Egpr(const ucontext_t *uc, uint32_t *featureSize) +{ + _ASSERTE(FPREG_HasApxRegisters(uc)); + return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_APX); +} #endif // XSTATE_SUPPORTED && HOST_AMD64 ///////////////////// diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 5154ee3c8800f..13ae3d037fac7 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -313,6 +313,17 @@ typedef int __ptrace_request; ASSIGN_CONTROL_REGS \ ASSIGN_INTEGER_REGS \ +#if defined(HOST_AMD64) && defined(XSTATE_SUPPORTED) +#ifndef XSTATE_APX +#define XSTATE_APX (19) +#endif // XSTATE_APX +#endif // HOST_AMD64 + +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (1 << XSTATE_APX) +#endif // XSTATE_MASK_APX +#endif // HOST_AMD64 && XSTATE_SUPPORTED + #if defined(XSTATE_SUPPORTED) || defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS) bool Xstate_IsAvx512Supported() { @@ -380,6 +391,59 @@ bool Xstate_IsAvx512Supported() return Xstate_Avx512Supported == 1; #endif } + +bool Xstate_IsApxSupported() +{ +#if defined(HAVE_MACH_EXCEPTIONS) + // TODO-xarch-apx: I assume OSX will never support APX + return false; +#else + static int Xstate_ApxSupported = -1; + + if (Xstate_ApxSupported == -1) + { + int cpuidInfo[4]; + + const int CPUID_EAX = 0; + const int CPUID_EBX = 1; + const int CPUID_ECX = 2; + const int CPUID_EDX = 3; + +#ifdef _DEBUG + // We should only be calling this function if we know the extended feature exists + __cpuid(cpuidInfo, 0x00000000); + _ASSERTE(static_cast(cpuidInfo[CPUID_EAX]) >= 0x0D); +#endif // _DEBUG + + __cpuidex(cpuidInfo, 0x0000000D, 0x00000000); + + if ((cpuidInfo[CPUID_EAX] & XSTATE_MASK_APX) == XSTATE_MASK_APX) + { + // Knight's Landing and Knight's Mill shipped without all 5 of the "baseline" + // AVX-512 ISAs that are required by x86-64-v4. Specifically they do not include + // BW, DQ, or VL. RyuJIT currently requires all 5 ISAs to be present so we will + // only enable Avx512 context save/restore when all exist. This requires us to + // query which ISAs are actually supported to ensure they're all present. + + __cpuidex(cpuidInfo, 0x00000007, 0x00000001); + + const int requiredApxFlags = (1 << 21); + + if ((cpuidInfo[CPUID_EDX] & requiredApxFlags) == requiredApxFlags) + { + Xstate_ApxSupported = 1; + } + } + + if (Xstate_ApxSupported == -1) + { + Xstate_ApxSupported = 0; + } + } + + return Xstate_ApxSupported == 1; +#endif +} #endif // XSTATE_SUPPORTED || defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS) #if !HAVE_MACH_EXCEPTIONS @@ -809,6 +873,18 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) dest = FPREG_Xstate_Hi16Zmm(native, &size); _ASSERT(size == (sizeof(M512) * 16)); memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); + +#ifndef TARGET_OSX + // TODO-xarch-apx: I suppose OSX will not support APX. + if (FPREG_HasApxRegisters(native)) + { + _ASSERT((lpContext->XStateFeaturesMask & XSATE_MASK_APX) == XSATE_MASK_APX); + + dest = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(dest, sizeof(DWORD64) * 16, &lpContext->Egpr16, sizeof(DWORD64) * 16); + } +#endif // !TARGET_OSX } } #elif defined(HOST_ARM64) @@ -1157,6 +1233,15 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } + + if (FPREG_HasApxRegisters(native)) + { + src = FPREG_Xstate_Egpr(native, &size); + _ASSERT(size == (sizeof(DWORD64) * 16)); + memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); + + lpContext->XStateFeaturesMask |= XSATE_MASK_APX; + } } #elif defined(HOST_ARM64) if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 524e1fd40b7ae..1d02b66cc7bd1 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -252,7 +252,7 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: // (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16) -#define SIZEOF__CONTEXT (3104) +#define SIZEOF__CONTEXT (3232) #else // Expression is too complicated, is currently: // (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index f8ecdcba72417..a16925e7de927 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -71,6 +71,9 @@ extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); #define IS_VALID_WRITE_PTR(addr, size) _ASSERTE((addr) != NULL) #define IS_VALID_CODE_PTR(addr) _ASSERTE((addr) != NULL) +#if defined(TARGET_AMD64) +#define XSTATE_MASK_APX (0x80000) +#endif // TARGET_AMD64 void ThreadSuspend::SetSuspendRuntimeInProgress() { @@ -1960,7 +1963,7 @@ CONTEXT* AllocateOSContextHelper(BYTE** contextBuffer) DWORD context = CONTEXT_COMPLETE; #if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512; + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; const ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_MPX | xStateFeatureMask; #elif defined(TARGET_ARM64) const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; @@ -2918,7 +2921,7 @@ BOOL Thread::RedirectThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt) // The system silently ignores any feature specified in the FeatureMask // which is not enabled on the processor. #if defined(TARGET_X86) || defined(TARGET_AMD64) - SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512); + SetXStateFeaturesMask(pCtx, XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX); #elif defined(TARGET_ARM64) if (g_pfnSetXStateFeaturesMask != NULL) { @@ -3069,7 +3072,7 @@ BOOL Thread::RedirectCurrentThreadAtHandledJITCase(PFN_REDIRECTTARGET pTgt, CONT if (srcFeatures != 0) { #if defined(TARGET_X86) || defined(TARGET_AMD64) - const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512; + const DWORD64 xStateFeatureMask = XSTATE_MASK_AVX | XSTATE_MASK_AVX512 | XSTATE_MASK_APX; #elif defined(TARGET_ARM64) const DWORD64 xStateFeatureMask = XSTATE_MASK_ARM64_SVE; #endif From b998cdfc95f732bd7efe923f2fb988b9a4a48a83 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 14:28:33 -0700 Subject: [PATCH 04/14] hand-written CPUID check part --- .../Compiler/HardwareIntrinsicHelpers.cs | 5 ++ src/coreclr/vm/codeman.cpp | 6 ++ src/native/minipal/cpufeatures.c | 57 +++++++++++++++++++ src/native/minipal/cpufeatures.h | 1 + 4 files changed, 69 insertions(+) diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index d00bca1109b20..e931aff6a96a4 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -78,6 +78,7 @@ private static class XArchIntrinsicConstants public const int Serialize = 0x20000; public const int Avx10v1 = 0x40000; public const int Evex = 0x80000; + public const int Apx = 0x100000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -135,6 +136,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v1_v512"); if ((flags & Evex) != 0) builder.AddSupportedInstructionSet("evex"); + if ((flags & Apx) != 0) + builder.AddSupportedInstructionSet("apx"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -204,6 +207,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), InstructionSet.X64_EVEX => Evex, InstructionSet.X64_EVEX_X64 => Evex, + InstructionSet.X64_APX => Apx, + InstructionSet.X64_APX_X64 => Apx, // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index d019affc33b33..c639886bd2c3f 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1440,6 +1440,12 @@ void EEJitManager::SetCpuInfo() } } } + #if defined(TARGET_AMD64) + if ((cpuFeatures & XArchIntrinsicConstants_Apx) != 0) + { + CPUCompileFlags.Set(InstructionSet_APX); + } + #endif // TARGET_AMD64 #elif defined(TARGET_ARM64) #if !defined(TARGET_WINDOWS) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8d6a063ce4d2f..6311977fcb6fd 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -72,6 +72,10 @@ static uint32_t xmmYmmStateSupport() #define XSTATE_MASK_AVX512 (0xE0) /* 0b1110_0000 */ #endif // XSTATE_MASK_AVX512 +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + static uint32_t avx512StateSupport() { #if defined(HOST_APPLE) @@ -99,6 +103,23 @@ static uint32_t avx512StateSupport() #endif } +static uint32_t apxStateSupport() +{ +#if defined(HOST_APPLE) + return false; +#elif defined(TARGET_X86) + return false; +#else + uint32_t eax; + __asm(" xgetbv\n" \ + : "=a"(eax) /*output in eax*/\ + : "c"(0) /*inputs - 0 in ecx*/\ + : "edx" /* registers that are clobbered*/ + ); + return ((eax & 0x80000) == 0x80000) ? 1 : 0; +#endif // TARGET_AMD64 +} + static bool IsAvxEnabled() { return true; @@ -108,6 +129,15 @@ static bool IsAvx512Enabled() { return true; } + +static bool IsApxEnabled() +{ +#if defined(TARGET_X86) + return false; +#else + return true; +#endif // TARGET_AMD64 +} #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_UNIX @@ -125,6 +155,19 @@ static uint32_t avx512StateSupport() return ((_xgetbv(0) & 0xE6) == 0x0E6) ? 1 : 0; } +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + +static uint32_t apxStateSupport() +{ +#if defined(TARGET_X86) + return false; +#else + return ((_xgetbv(0) & 0x80000) == 0x80000) ? 1 : 0; +#endif +} + static bool IsAvxEnabled() { DWORD64 FeatureMask = GetEnabledXStateFeatures(); @@ -137,6 +180,12 @@ static bool IsAvx512Enabled() return ((FeatureMask & XSTATE_MASK_AVX512) != 0); } +static bool IsApxEnabled() +{ + DWORD64 FeatureMask = GetEnabledXStateFeatures(); + return ((FeatureMask & XSTATE_MASK_APX) != 0); +} + #endif // defined(HOST_X86) || defined(HOST_AMD64) #endif // HOST_WINDOWS @@ -252,6 +301,14 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_AvxVnni; } + if (IsApxEnabled() && apxStateSupport()) + { + if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Apx + { + result |= XArchIntrinsicConstants_Apx; + } + } + if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 { __cpuidex(cpuidInfo, 0x00000024, 0x00000000); diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 6422fe33f9787..ef56c3baa95ba 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -31,6 +31,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Serialize = 0x20000, XArchIntrinsicConstants_Avx10v1 = 0x40000, XArchIntrinsicConstants_Evex = 0x80000, + XArchIntrinsicConstants_Apx = 0x100000, }; #endif // HOST_X86 || HOST_AMD64 From 3446e2847bb1071ddfcd4c7d3bb0102445c9aee8 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 14:30:40 -0700 Subject: [PATCH 05/14] fix --- .../src/System/Runtime/ExceptionServices/AsmOffsets.cs | 2 +- src/native/minipal/cpufeatures.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 9e7999a7bc9e4..dc0208a61a429 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -120,7 +120,7 @@ class AsmOffsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xca0; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIX diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 6311977fcb6fd..fd3dd2d005a8a 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -303,7 +303,7 @@ int minipal_getcpufeatures(void) if (IsApxEnabled() && apxStateSupport()) { - if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Apx + if ((cpuidInfo[CPUID_EDX] & (1 << 21)) != 0) // Apx { result |= XArchIntrinsicConstants_Apx; } From 9bc008a8db2cfae56e9a7010488da2590b589f65 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 15:32:57 -0700 Subject: [PATCH 06/14] Fix merge error. --- src/coreclr/pal/src/thread/context.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 13ae3d037fac7..cdd0af0ac6087 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -317,7 +317,6 @@ typedef int __ptrace_request; #ifndef XSTATE_APX #define XSTATE_APX (19) #endif // XSTATE_APX -#endif // HOST_AMD64 #ifndef XSTATE_MASK_APX #define XSTATE_MASK_APX (1 << XSTATE_APX) @@ -1240,7 +1239,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex _ASSERT(size == (sizeof(DWORD64) * 16)); memcpy_s(&lpContext->Egpr16, sizeof(DWORD64) * 16, src, sizeof(DWORD64) * 16); - lpContext->XStateFeaturesMask |= XSATE_MASK_APX; + lpContext->XStateFeaturesMask |= XSTATE_MASK_APX; } } #elif defined(HOST_ARM64) From 44163d75a6ac84f1a88925ccb861e7b0cd1dcbce Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 28 Aug 2024 16:50:46 -0700 Subject: [PATCH 07/14] bug fixes --- .../src/System/Runtime/ExceptionServices/AsmOffsets.cs | 6 +++--- src/coreclr/pal/inc/pal.h | 2 ++ src/coreclr/pal/src/thread/context.cpp | 3 ++- src/coreclr/vm/amd64/asmconstants.h | 2 +- src/coreclr/vm/threadsuspend.cpp | 3 ++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index dc0208a61a429..9ee9683c47e74 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -68,9 +68,9 @@ class AsmOffsets // Release build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1a80; - public const int OFFSETOF__REGDISPLAY__SP = 0x1a70; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a78; + public const int SIZEOF__REGDISPLAY = 0x1b80; + public const int OFFSETOF__REGDISPLAY__SP = 0x1b70; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b78; #else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0xbf0; public const int OFFSETOF__REGDISPLAY__SP = 0xbd0; diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 209e71420e6e1..f12ddf8b610d8 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1384,12 +1384,14 @@ typedef struct _KNONVOLATILE_CONTEXT_POINTERS { #define XSTATE_AVX512_KMASK (5) #define XSTATE_AVX512_ZMM_H (6) #define XSTATE_AVX512_ZMM (7) +#define XSTATE_APX (19) #define XSTATE_MASK_GSSE (UI64(1) << (XSTATE_GSSE)) #define XSTATE_MASK_AVX (XSTATE_MASK_GSSE) #define XSTATE_MASK_AVX512 ((UI64(1) << (XSTATE_AVX512_KMASK)) | \ (UI64(1) << (XSTATE_AVX512_ZMM_H)) | \ (UI64(1) << (XSTATE_AVX512_ZMM))) +#define XSTATE_MASK_APX (UI64(1) << (XSTATE_APX)) typedef struct DECLSPEC_ALIGN(16) _M128A { ULONGLONG Low; diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index cdd0af0ac6087..4a53e19bf7ccb 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -1232,7 +1232,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } - +#if !defined(TARGET_OSX) if (FPREG_HasApxRegisters(native)) { src = FPREG_Xstate_Egpr(native, &size); @@ -1241,6 +1241,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_APX; } +#endif // TARGET_OSX } #elif defined(HOST_ARM64) if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 1d02b66cc7bd1..12dde168e607e 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -251,7 +251,7 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: -// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16) +// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16 + /*XSTATE_APX*/ 8*16) #define SIZEOF__CONTEXT (3232) #else // Expression is too complicated, is currently: diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index a16925e7de927..919bbbabc1374 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -71,7 +71,8 @@ extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); #define IS_VALID_WRITE_PTR(addr, size) _ASSERTE((addr) != NULL) #define IS_VALID_CODE_PTR(addr) _ASSERTE((addr) != NULL) -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_X86) +// Although APX is only for X64, we can still define it under x86 to keep the original code structure. #define XSTATE_MASK_APX (0x80000) #endif // TARGET_AMD64 From 600abd02601e9d6623767f5a058d583d4df2652f Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Thu, 29 Aug 2024 10:32:34 -0700 Subject: [PATCH 08/14] Bug fix --- src/coreclr/pal/src/thread/context.cpp | 2 +- src/coreclr/vm/threadsuspend.cpp | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 4a53e19bf7ccb..aa97ff79103d8 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -877,7 +877,7 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) // TODO-xarch-apx: I suppose OSX will not support APX. if (FPREG_HasApxRegisters(native)) { - _ASSERT((lpContext->XStateFeaturesMask & XSATE_MASK_APX) == XSATE_MASK_APX); + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX); dest = FPREG_Xstate_Egpr(native, &size); _ASSERT(size == (sizeof(DWORD64) * 16)); diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 919bbbabc1374..289b361a9d210 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -72,9 +72,15 @@ extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); #define IS_VALID_CODE_PTR(addr) _ASSERTE((addr) != NULL) #if defined(TARGET_AMD64) || defined(TARGET_X86) -// Although APX is only for X64, we can still define it under x86 to keep the original code structure. -#define XSTATE_MASK_APX (0x80000) -#endif // TARGET_AMD64 +// These values should be picked up from winrt.h, defining them in case they are missing there. +#ifndef XSTATE_APX +#define XSTATE_APX (19) +#endif // XSTATE_APX + +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (1 << XSTATE_APX) +#endif // XSTATE_MASK_APX +#endif // TARGET_AMD64 || TARGET_X86 void ThreadSuspend::SetSuspendRuntimeInProgress() { From 81b47c6e3dc39ac902c0272e674eb7eac87beb43 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Thu, 29 Aug 2024 11:11:12 -0700 Subject: [PATCH 09/14] bug fix --- .../src/System/Runtime/ExceptionServices/AsmOffsets.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 9ee9683c47e74..02908d68a7637 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -16,9 +16,9 @@ class AsmOffsets // Debug build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1a90; - public const int OFFSETOF__REGDISPLAY__SP = 0x1a78; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a80; + public const int SIZEOF__REGDISPLAY = 0x1b90; + public const int OFFSETOF__REGDISPLAY__SP = 0x1b78; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b80; #else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0xbf0; public const int OFFSETOF__REGDISPLAY__SP = 0xbd8; From d516febb79ecbbb9edc14226f206130d44dedd11 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 3 Sep 2024 10:48:59 -0700 Subject: [PATCH 10/14] resolve commnets. --- src/coreclr/pal/src/arch/amd64/context2.S | 5 +---- src/coreclr/pal/src/thread/context.cpp | 6 +++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index ad7e9b35ad528..b4931a84e099b 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -183,10 +183,7 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): kmovq k6, qword ptr [rdi + (CONTEXT_KMask0 + 6 * 8)] kmovq k7, qword ptr [rdi + (CONTEXT_KMask0 + 7 * 8)] - // TODO-xarch-apx: the definition of XSTATE mask value for APX is now missing on the OS level, - // we are currently using bare value to hack it through the build process, and test the implementation through CI. - // those changes will be removed when we have the OS support for APX. - test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], 524288 + test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], XSTATE_MASK_APX je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index aa97ff79103d8..bfd7c7502b3cc 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -2188,10 +2188,14 @@ CONTEXT& CONTEXT::operator=(const CONTEXT& ctx) size_t copySize; if (ctx.ContextFlags & CONTEXT_XSTATE & CONTEXT_AREA_MASK) { - if ((ctx.XStateFeaturesMask & XSTATE_MASK_AVX512) == XSTATE_MASK_AVX512) + if ((ctx.XStateFeaturesMask & XSTATE_MASK_APX) == XSTATE_MASK_APX) { copySize = sizeof(CONTEXT); } + else if ((ctx.XStateFeaturesMask & XSTATE_MASK_AVX512) == XSTATE_MASK_AVX512) + { + copySize = offsetof(CONTEXT, Egpr16); + } else { copySize = offsetof(CONTEXT, KMask0); From 79b23c8a169a2dc5e165aca428f9c4689b5152d6 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Mon, 4 Nov 2024 15:43:59 -0800 Subject: [PATCH 11/14] re-generate the ISA changes to propagate the changes in ThunkGenerator. --- src/coreclr/inc/corinfoinstructionset.h | 112 +++++++++--------- src/coreclr/inc/jiteeversionguid.h | 10 +- .../JitInterface/CorInfoInstructionSet.cs | 112 +++++++++--------- 3 files changed, 121 insertions(+), 113 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 59ef31ccf6e58..f3111afaa836b 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -81,33 +81,35 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=36, InstructionSet_VectorT256=37, InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_APX=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_EVEX_X64=59, + InstructionSet_AVX512F_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512CD_X64=62, + InstructionSet_AVX512DQ_X64=63, + InstructionSet_AVX512VBMI_X64=64, + InstructionSet_AVX10v1_X64=65, + InstructionSet_AVX10v1_V512_X64=66, + InstructionSet_APX_X64=67, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -148,33 +150,35 @@ enum CORINFO_InstructionSet InstructionSet_VectorT128=36, InstructionSet_VectorT256=37, InstructionSet_VectorT512=38, - InstructionSet_X86Base_X64=39, - InstructionSet_SSE_X64=40, - InstructionSet_SSE2_X64=41, - InstructionSet_SSE3_X64=42, - InstructionSet_SSSE3_X64=43, - InstructionSet_SSE41_X64=44, - InstructionSet_SSE42_X64=45, - InstructionSet_AVX_X64=46, - InstructionSet_AVX2_X64=47, - InstructionSet_AES_X64=48, - InstructionSet_BMI1_X64=49, - InstructionSet_BMI2_X64=50, - InstructionSet_FMA_X64=51, - InstructionSet_LZCNT_X64=52, - InstructionSet_PCLMULQDQ_X64=53, - InstructionSet_POPCNT_X64=54, - InstructionSet_AVXVNNI_X64=55, - InstructionSet_MOVBE_X64=56, - InstructionSet_X86Serialize_X64=57, - InstructionSet_EVEX_X64=58, - InstructionSet_AVX512F_X64=59, - InstructionSet_AVX512BW_X64=60, - InstructionSet_AVX512CD_X64=61, - InstructionSet_AVX512DQ_X64=62, - InstructionSet_AVX512VBMI_X64=63, - InstructionSet_AVX10v1_X64=64, - InstructionSet_AVX10v1_V512_X64=65, + InstructionSet_APX=39, + InstructionSet_X86Base_X64=40, + InstructionSet_SSE_X64=41, + InstructionSet_SSE2_X64=42, + InstructionSet_SSE3_X64=43, + InstructionSet_SSSE3_X64=44, + InstructionSet_SSE41_X64=45, + InstructionSet_SSE42_X64=46, + InstructionSet_AVX_X64=47, + InstructionSet_AVX2_X64=48, + InstructionSet_AES_X64=49, + InstructionSet_BMI1_X64=50, + InstructionSet_BMI2_X64=51, + InstructionSet_FMA_X64=52, + InstructionSet_LZCNT_X64=53, + InstructionSet_PCLMULQDQ_X64=54, + InstructionSet_POPCNT_X64=55, + InstructionSet_AVXVNNI_X64=56, + InstructionSet_MOVBE_X64=57, + InstructionSet_X86Serialize_X64=58, + InstructionSet_EVEX_X64=59, + InstructionSet_AVX512F_X64=60, + InstructionSet_AVX512BW_X64=61, + InstructionSet_AVX512CD_X64=62, + InstructionSet_AVX512DQ_X64=63, + InstructionSet_AVX512VBMI_X64=64, + InstructionSet_AVX10v1_X64=65, + InstructionSet_AVX10v1_V512_X64=66, + InstructionSet_APX_X64=67, #endif // TARGET_X86 }; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index b307e4659215e..7a6479c81e5ae 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* c3886dcb-d533-44b8-86c0-ff79ac4ce9df */ - 0xc3886dcb, - 0xd533, - 0x44b8, - {0x86, 0xc0, 0xff, 0x79, 0xac, 0x4c, 0xe9, 0xdf} +constexpr GUID JITEEVersionIdentifier = { /* 381fc250-b8f3-4cee-834e-b0bc682a09f2 */ + 0x381fc250, + 0xb8f3, + 0x4cee, + {0x83, 0x4e, 0xb0, 0xbc, 0x68, 0x2a, 0x09, 0xf2} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index aa56167e43da4..38c4d0835ad2e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -249,33 +249,35 @@ public enum InstructionSet_X64 VectorT128 = 36, VectorT256 = 37, VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + APX = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + EVEX_X64 = 59, + AVX512F_X64 = 60, + AVX512BW_X64 = 61, + AVX512CD_X64 = 62, + AVX512DQ_X64 = 63, + AVX512VBMI_X64 = 64, + AVX10v1_X64 = 65, + AVX10v1_V512_X64 = 66, + APX_X64 = 67, } public enum InstructionSet_X86 @@ -320,33 +322,35 @@ public enum InstructionSet_X86 VectorT128 = 36, VectorT256 = 37, VectorT512 = 38, - X86Base_X64 = 39, - SSE_X64 = 40, - SSE2_X64 = 41, - SSE3_X64 = 42, - SSSE3_X64 = 43, - SSE41_X64 = 44, - SSE42_X64 = 45, - AVX_X64 = 46, - AVX2_X64 = 47, - AES_X64 = 48, - BMI1_X64 = 49, - BMI2_X64 = 50, - FMA_X64 = 51, - LZCNT_X64 = 52, - PCLMULQDQ_X64 = 53, - POPCNT_X64 = 54, - AVXVNNI_X64 = 55, - MOVBE_X64 = 56, - X86Serialize_X64 = 57, - EVEX_X64 = 58, - AVX512F_X64 = 59, - AVX512BW_X64 = 60, - AVX512CD_X64 = 61, - AVX512DQ_X64 = 62, - AVX512VBMI_X64 = 63, - AVX10v1_X64 = 64, - AVX10v1_V512_X64 = 65, + APX = 39, + X86Base_X64 = 40, + SSE_X64 = 41, + SSE2_X64 = 42, + SSE3_X64 = 43, + SSSE3_X64 = 44, + SSE41_X64 = 45, + SSE42_X64 = 46, + AVX_X64 = 47, + AVX2_X64 = 48, + AES_X64 = 49, + BMI1_X64 = 50, + BMI2_X64 = 51, + FMA_X64 = 52, + LZCNT_X64 = 53, + PCLMULQDQ_X64 = 54, + POPCNT_X64 = 55, + AVXVNNI_X64 = 56, + MOVBE_X64 = 57, + X86Serialize_X64 = 58, + EVEX_X64 = 59, + AVX512F_X64 = 60, + AVX512BW_X64 = 61, + AVX512CD_X64 = 62, + AVX512DQ_X64 = 63, + AVX512VBMI_X64 = 64, + AVX10v1_X64 = 65, + AVX10v1_V512_X64 = 66, + APX_X64 = 67, } public unsafe struct InstructionSetFlags : IEnumerable From 8e1ba4a9a0233a9c1723d8bd4a9b483c5b2d2b7d Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Fri, 8 Nov 2024 10:21:49 -0800 Subject: [PATCH 12/14] resolve comments --- docs/design/features/xarch-apx.md | 3 --- src/coreclr/vm/threadsuspend.cpp | 2 +- src/native/minipal/cpufeatures.c | 6 +++--- 3 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 docs/design/features/xarch-apx.md diff --git a/docs/design/features/xarch-apx.md b/docs/design/features/xarch-apx.md deleted file mode 100644 index 8997da56fbdb8..0000000000000 --- a/docs/design/features/xarch-apx.md +++ /dev/null @@ -1,3 +0,0 @@ -# APX Integration in .NET - -Let's keep documentation on APX integration and notes on things here. I will evolve this as necessary. \ No newline at end of file diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 811305424c524..9b363c7876a37 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -72,7 +72,7 @@ extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); #define IS_VALID_CODE_PTR(addr) _ASSERTE((addr) != NULL) #if defined(TARGET_AMD64) || defined(TARGET_X86) -// These values should be picked up from winrt.h, defining them in case they are missing there. +// These values should be picked up from winnt.h, defining them in case they are missing there. #ifndef XSTATE_APX #define XSTATE_APX (19) #endif // XSTATE_APX diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index fd3dd2d005a8a..43fc0fa5742e5 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -106,9 +106,9 @@ static uint32_t avx512StateSupport() static uint32_t apxStateSupport() { #if defined(HOST_APPLE) - return false; + return 0; #elif defined(TARGET_X86) - return false; + return 0; #else uint32_t eax; __asm(" xgetbv\n" \ @@ -162,7 +162,7 @@ static uint32_t avx512StateSupport() static uint32_t apxStateSupport() { #if defined(TARGET_X86) - return false; + return 0; #else return ((_xgetbv(0) & 0x80000) == 0x80000) ? 1 : 0; #endif From ea8949f6ce08f024b23eaca157e7a140e0373ffd Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Fri, 8 Nov 2024 10:33:29 -0800 Subject: [PATCH 13/14] use byte code for EGPR XSAVE logics. --- src/coreclr/pal/src/arch/amd64/context2.S | 52 ++++++++++++++++------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index b4931a84e099b..54ab6ec037ab2 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -186,22 +186,42 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): test BYTE PTR [rdi + CONTEXT_XStateFeaturesMask], XSTATE_MASK_APX je LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE) - mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] - mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] - mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] - mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] - mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] - mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] - mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] - mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] - mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] - mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] - mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] - mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] - mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] - mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] - mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] - mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] + // TODO-XArch-APX: + // we are using raw hex code here to emit EGPRs-related changes, + // we will need to come back and re-write this part when assembler supports EGPRs. + + // mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] + .byte 0xd5, 0x48, 0x8b, 0x87, 0xa0, 0x08, 0x00, 0x00 + // mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] + .byte 0xd5, 0x48, 0x8b, 0x8f, 0xa8, 0x08, 0x00, 0x00 + // mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] + .byte 0xd5, 0x48, 0x8b, 0x97, 0xb0, 0x08, 0x00, 0x00 + // mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] + .byte 0xd5, 0x48, 0x8b, 0x9f, 0xb8, 0x08, 0x00, 0x00 + // mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] + .byte 0xd5, 0x48, 0x8b, 0xa7, 0xc0, 0x08, 0x00, 0x00 + // mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] + .byte 0xd5, 0x48, 0x8b, 0xaf, 0xc8, 0x08, 0x00, 0x00 + // mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] + .byte 0xd5, 0x48, 0x8b, 0xb7, 0xd0, 0x08, 0x00, 0x00 + // mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] + .byte 0xd5, 0x48, 0x8b, 0xbf, 0xd8, 0x08, 0x00, 0x00 + // mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x87, 0xe0, 0x08, 0x00, 0x00 + // mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x8f, 0xe8, 0x08, 0x00, 0x00 + // mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x97, 0xf0, 0x08, 0x00, 0x00 + // mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] + .byte 0xd5, 0x4c, 0x8b, 0x9f, 0xf8, 0x08, 0x00, 0x00 + // mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xa7, 0x00, 0x09, 0x00, 0x00 + // mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xaf, 0x08, 0x09, 0x00, 0x00 + // mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xb7, 0x10, 0x09, 0x00, 0x00 + // mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] + .byte 0xd5, 0x4c, 0x8b, 0xbf, 0x18, 0x09, 0x00, 0x00 LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): From 597e797248b3f66716d586650f9d421ce35cd228 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Fri, 8 Nov 2024 14:36:48 -0800 Subject: [PATCH 14/14] resolve comments. --- src/coreclr/pal/src/arch/amd64/asmconstants.h | 4 +-- src/coreclr/pal/src/arch/amd64/context2.S | 32 +++++++++---------- src/coreclr/pal/src/thread/context.cpp | 10 ------ .../ThunkGenerator/InstructionSetDesc.txt | 2 +- src/coreclr/vm/amd64/asmconstants.h | 9 +++++- src/native/minipal/cpufeatures.c | 14 +++++--- 6 files changed, 37 insertions(+), 34 deletions(-) diff --git a/src/coreclr/pal/src/arch/amd64/asmconstants.h b/src/coreclr/pal/src/arch/amd64/asmconstants.h index ed63a88e0e817..8e97efdbf6882 100644 --- a/src/coreclr/pal/src/arch/amd64/asmconstants.h +++ b/src/coreclr/pal/src/arch/amd64/asmconstants.h @@ -93,8 +93,8 @@ #define CONTEXT_KMask0 CONTEXT_Ymm0H+(16*16) #define CONTEXT_Zmm0H CONTEXT_KMask0+(8*8) #define CONTEXT_Zmm16 CONTEXT_Zmm0H+(32*16) -#define CONTEXT_Egpr CONTEXT_Zmm16+(16*8) -#define CONTEXT_Size CONTEXT_Egpr+(64*16) +#define CONTEXT_Egpr CONTEXT_Zmm16+(64*16) +#define CONTEXT_Size CONTEXT_Egpr+(8*16) #else // HOST_64BIT diff --git a/src/coreclr/pal/src/arch/amd64/context2.S b/src/coreclr/pal/src/arch/amd64/context2.S index 54ab6ec037ab2..2b183798f0067 100644 --- a/src/coreclr/pal/src/arch/amd64/context2.S +++ b/src/coreclr/pal/src/arch/amd64/context2.S @@ -191,37 +191,37 @@ LOCAL_LABEL(Done_Restore_CONTEXT_FLOATING_POINT): // we will need to come back and re-write this part when assembler supports EGPRs. // mov r16, qword ptr [rdi + CONTEXT_Egpr + 0 * 8] - .byte 0xd5, 0x48, 0x8b, 0x87, 0xa0, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0x87, 0x20, 0x0c, 0x00, 0x00 // mov r17, qword ptr [rdi + CONTEXT_Egpr + 1 * 8] - .byte 0xd5, 0x48, 0x8b, 0x8f, 0xa8, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0x8f, 0x28, 0x0c, 0x00, 0x00 // mov r18, qword ptr [rdi + CONTEXT_Egpr + 2 * 8] - .byte 0xd5, 0x48, 0x8b, 0x97, 0xb0, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0x97, 0x30, 0x0c, 0x00, 0x00 // mov r19, qword ptr [rdi + CONTEXT_Egpr + 3 * 8] - .byte 0xd5, 0x48, 0x8b, 0x9f, 0xb8, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0x9f, 0x38, 0x0c, 0x00, 0x00 // mov r20, qword ptr [rdi + CONTEXT_Egpr + 4 * 8] - .byte 0xd5, 0x48, 0x8b, 0xa7, 0xc0, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0xa7, 0x40, 0x0c, 0x00, 0x00 // mov r21, qword ptr [rdi + CONTEXT_Egpr + 5 * 8] - .byte 0xd5, 0x48, 0x8b, 0xaf, 0xc8, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0xaf, 0x48, 0x0c, 0x00, 0x00 // mov r22, qword ptr [rdi + CONTEXT_Egpr + 6 * 8] - .byte 0xd5, 0x48, 0x8b, 0xb7, 0xd0, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0xb7, 0x50, 0x0c, 0x00, 0x00 // mov r23, qword ptr [rdi + CONTEXT_Egpr + 7 * 8] - .byte 0xd5, 0x48, 0x8b, 0xbf, 0xd8, 0x08, 0x00, 0x00 + .byte 0xd5, 0x48, 0x8b, 0xbf, 0x58, 0x0c, 0x00, 0x00 // mov r24, qword ptr [rdi + CONTEXT_Egpr + 8 * 8] - .byte 0xd5, 0x4c, 0x8b, 0x87, 0xe0, 0x08, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0x87, 0x60, 0x0c, 0x00, 0x00 // mov r25, qword ptr [rdi + CONTEXT_Egpr + 9 * 8] - .byte 0xd5, 0x4c, 0x8b, 0x8f, 0xe8, 0x08, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0x8f, 0x68, 0x0c, 0x00, 0x00 // mov r26, qword ptr [rdi + CONTEXT_Egpr + 10 * 8] - .byte 0xd5, 0x4c, 0x8b, 0x97, 0xf0, 0x08, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0x97, 0x70, 0x0c, 0x00, 0x00 // mov r27, qword ptr [rdi + CONTEXT_Egpr + 11 * 8] - .byte 0xd5, 0x4c, 0x8b, 0x9f, 0xf8, 0x08, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0x9f, 0x78, 0x0c, 0x00, 0x00 // mov r28, qword ptr [rdi + CONTEXT_Egpr + 12 * 8] - .byte 0xd5, 0x4c, 0x8b, 0xa7, 0x00, 0x09, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0xa7, 0x80, 0x0c, 0x00, 0x00 // mov r29, qword ptr [rdi + CONTEXT_Egpr + 13 * 8] - .byte 0xd5, 0x4c, 0x8b, 0xaf, 0x08, 0x09, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0xaf, 0x88, 0x0c, 0x00, 0x00 // mov r30, qword ptr [rdi + CONTEXT_Egpr + 14 * 8] - .byte 0xd5, 0x4c, 0x8b, 0xb7, 0x10, 0x09, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0xb7, 0x90, 0x0c, 0x00, 0x00 // mov r31, qword ptr [rdi + CONTEXT_Egpr + 15 * 8] - .byte 0xd5, 0x4c, 0x8b, 0xbf, 0x18, 0x09, 0x00, 0x00 + .byte 0xd5, 0x4c, 0x8b, 0xbf, 0x98, 0x0c, 0x00, 0x00 LOCAL_LABEL(Done_Restore_CONTEXT_XSTATE): diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 9c50ec5c6b03c..9dac4b2f36635 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -313,16 +313,6 @@ typedef int __ptrace_request; ASSIGN_CONTROL_REGS \ ASSIGN_INTEGER_REGS \ -#if defined(HOST_AMD64) && defined(XSTATE_SUPPORTED) -#ifndef XSTATE_APX -#define XSTATE_APX (19) -#endif // XSTATE_APX - -#ifndef XSTATE_MASK_APX -#define XSTATE_MASK_APX (1 << XSTATE_APX) -#endif // XSTATE_MASK_APX -#endif // HOST_AMD64 && XSTATE_SUPPORTED - #if defined(XSTATE_SUPPORTED) || defined(HOST_AMD64) && defined(HAVE_MACH_EXCEPTIONS) bool Xstate_IsAvx512Supported() { diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 5aaf654677579..1e0f59c7f6714 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -22,7 +22,7 @@ ; DO NOT CHANGE R2R NUMERIC VALUES OF THE EXISTING SETS. Changing R2R numeric values definitions would be R2R format breaking change. -; The ISA definiitons should also be mapped to `hwintrinsicIsaRangeArray` in hwintrinsic.cpp. +; The ISA definitions should also be mapped to `hwintrinsicIsaRangeArray` in hwintrinsic.cpp. ; Definition of X86 instruction sets definearch ,X86 ,32Bit ,X64, X64 diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index d17a7142ba97d..4b38aeeaca52d 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -238,7 +238,14 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__VASigCookie__pNDirectILStub #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) // Expression is too complicated, is currently: -// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + /*XSTATE*/ + 8 + 8 + /*XSTATE_AVX*/ 16*16 + /*XSTATE_AVX512_KMASK*/ 8*8 + /*XSTATE_AVX512_ZMM_H*/ 32*16 + /*XSTATE_AVX512_ZMM*/ 64*16 + /*XSTATE_APX*/ 8*16) +// (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + +// /*XMM_SAVE_AREA32*/(2*2 + 1*2 + 2 + 4 + 2*2 + 4 + 2*2 + 4*2 + 16*8 + 16*16 + 1*96) + 26*16 + 8 + 8*5 + +// /*XSTATE*/ + 8 + 8 + +// /*XSTATE_AVX*/ 16*16 + +// /*XSTATE_AVX512_KMASK*/ 8*8 + +// /*XSTATE_AVX512_ZMM_H*/ 32*16 + +// /*XSTATE_AVX512_ZMM*/ 64*16 + +// /*XSTATE_APX*/ 8*16) #define SIZEOF__CONTEXT (3232) #else // Expression is too complicated, is currently: diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 43fc0fa5742e5..1d1bbf9e9bc2a 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -155,10 +155,6 @@ static uint32_t avx512StateSupport() return ((_xgetbv(0) & 0xE6) == 0x0E6) ? 1 : 0; } -#ifndef XSTATE_MASK_APX -#define XSTATE_MASK_APX (0x80000) -#endif // XSTATE_MASK_APX - static uint32_t apxStateSupport() { #if defined(TARGET_X86) @@ -180,10 +176,20 @@ static bool IsAvx512Enabled() return ((FeatureMask & XSTATE_MASK_AVX512) != 0); } +// TODO-XArch-APX: +// we will eventually need to remove this macro when windows officially supports APX. +#ifndef XSTATE_MASK_APX +#define XSTATE_MASK_APX (0x80000) +#endif // XSTATE_MASK_APX + static bool IsApxEnabled() { +#ifdef TARGET_X86 + return false; +#else DWORD64 FeatureMask = GetEnabledXStateFeatures(); return ((FeatureMask & XSTATE_MASK_APX) != 0); +#endif } #endif // defined(HOST_X86) || defined(HOST_AMD64)