From 72a681f8686f0550f34967f70a5a6c3ab5aa807f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 19 Jun 2024 18:31:34 +0100 Subject: [PATCH 01/29] ARM64-SVE: Add SVE registers to pal context --- .../Runtime/ExceptionServices/AsmOffsets.cs | 8 +- src/coreclr/debug/inc/dbgtargetcontext.h | 25 +++- src/coreclr/pal/inc/pal.h | 24 ++- src/coreclr/pal/src/include/pal/context.h | 33 +---- src/coreclr/pal/src/thread/context.cpp | 139 +++++++++++++++++- src/coreclr/vm/arm64/asmconstants.h | 2 +- 6 files changed, 186 insertions(+), 45 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 7db188808e26a4..770cd170f0a0a2 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -25,9 +25,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX #elif TARGET_ARM64 - public const int SIZEOF__REGDISPLAY = 0x940; - public const int OFFSETOF__REGDISPLAY__SP = 0x898; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; + public const int SIZEOF__REGDISPLAY = 0xde0; + public const int OFFSETOF__REGDISPLAY__SP = 0xd38; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0xd40; #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; public const int OFFSETOF__REGDISPLAY__SP = 0x3ec; @@ -113,7 +113,7 @@ class AsmOffsets public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIx #elif TARGET_ARM64 - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x5e0; #elif TARGET_ARM public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0; #elif TARGET_X86 diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index e43cf5c3e5d3a8..5e3f3f974d831b 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -399,6 +399,12 @@ typedef struct { LONGLONG High; } DT_NEON128; +typedef struct { + ULONGLONG Low; + LONGLONG High; +} DT_SVE128; + + typedef DECLSPEC_ALIGN(16) struct { // // Control flags. @@ -458,15 +464,24 @@ typedef DECLSPEC_ALIGN(16) struct { /* +0x310 */ DWORD Fpcr; /* +0x314 */ DWORD Fpsr; + // + // Sve Registers + // + //TODO-SVE: How does this structure handle variable sized Z/P/FFR registers? + /* +0x318 */ DWORD Vl; + /* +0x32C */ DT_SVE128 Z[32]; + /* +0x? */ WORD P[32]; + /* +0x? */ WORD Ffr; + // // Debug registers // - /* +0x318 */ DWORD Bcr[DT_ARM64_MAX_BREAKPOINTS]; - /* +0x338 */ DWORD64 Bvr[DT_ARM64_MAX_BREAKPOINTS]; - /* +0x378 */ DWORD Wcr[DT_ARM64_MAX_WATCHPOINTS]; - /* +0x380 */ DWORD64 Wvr[DT_ARM64_MAX_WATCHPOINTS]; - /* +0x390 */ + /* +0x? */ DWORD Bcr[DT_ARM64_MAX_BREAKPOINTS]; + /* +0x? */ DWORD64 Bvr[DT_ARM64_MAX_BREAKPOINTS]; + /* +0x? */ DWORD Wcr[DT_ARM64_MAX_WATCHPOINTS]; + /* +0x? */ DWORD64 Wvr[DT_ARM64_MAX_WATCHPOINTS]; + /* +0x5e0 */ } DT_CONTEXT; diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 920c44f98cd4c1..1d43cb63471704 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1876,6 +1876,11 @@ typedef struct _NEON128 { LONGLONG High; } NEON128, *PNEON128; +typedef struct _SVE128 { + ULONGLONG Low; + LONGLONG High; +} SVE128, *PSVE128; + typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // @@ -1936,15 +1941,24 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { /* +0x310 */ DWORD Fpcr; /* +0x314 */ DWORD Fpsr; + // + // Sve Registers + // + //TODO-SVE: How does this structure handle variable sized Z/P/FFR registers? + /* +0x318 */ DWORD Vl; + /* +0x32C */ SVE128 Z[32]; + /* +0x32C */ WORD P[32]; + /* +0x32C */ WORD Ffr; + // // Debug registers // - /* +0x318 */ DWORD Bcr[ARM64_MAX_BREAKPOINTS]; - /* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; - /* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; - /* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; - /* +0x390 */ + /* +0x? */ DWORD Bcr[ARM64_MAX_BREAKPOINTS]; + /* +0x? */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; + /* +0x? */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; + /* +0x? */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; + /* +0x? */ } CONTEXT, *PCONTEXT, *LPCONTEXT; diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 6eeeaa6fed7453..7f3fe9113eddef 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -662,41 +662,18 @@ const struct fpregs* GetConstNativeSigSimdContext(const native_context_t *mc) #define MCREG_Pc(mc) ((mc).pc) #define MCREG_Cpsr(mc) ((mc).pstate) +void _GetNativeSigSimdContext(uint8_t *data, uint32_t size, fpsimd_context **fp_ptr, sve_context **sve_ptr); inline -fpsimd_context* GetNativeSigSimdContext(native_context_t *mc) +void GetNativeSigSimdContext(native_context_t *mc, fpsimd_context **fp_ptr, sve_context **sve_ptr) { - size_t size = 0; - - do - { - fpsimd_context* fp = reinterpret_cast(&mc->uc_mcontext.__reserved[size]); - - if(fp->head.magic == FPSIMD_MAGIC) - { - _ASSERTE(fp->head.size >= sizeof(fpsimd_context)); - _ASSERTE(size + fp->head.size <= sizeof(mc->uc_mcontext.__reserved)); - - return fp; - } - - if (fp->head.size == 0) - { - break; - } - - size += fp->head.size; - } while (size + sizeof(fpsimd_context) <= sizeof(mc->uc_mcontext.__reserved)); - - _ASSERTE(false); - - return nullptr; + _GetNativeSigSimdContext(&mc->uc_mcontext.__reserved[0], sizeof(mc->uc_mcontext.__reserved), fp_ptr, sve_ptr); } inline -const fpsimd_context* GetConstNativeSigSimdContext(const native_context_t *mc) +void GetConstNativeSigSimdContext(const native_context_t *mc, fpsimd_context const **fp_ptr, sve_context const **sve_ptr) { - return GetNativeSigSimdContext(const_cast(mc)); + GetNativeSigSimdContext(const_cast(mc), const_cast(fp_ptr), const_cast(sve_ptr)); } #else // TARGET_OSX diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 04fabab0e7253e..7056eb049ac978 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -708,7 +708,9 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } } #else // TARGET_OSX - fpsimd_context* fp = GetNativeSigSimdContext(native); + fpsimd_context* fp = nullptr; + sve_context* sve = nullptr; + GetNativeSigSimdContext(native, &fp, &sve); if (fp) { fp->fpsr = lpContext->Fpsr; @@ -718,6 +720,25 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) *(NEON128*) &fp->vregs[i] = lpContext->V[i]; } } + if (sve) + { + //TODO-SVE: This only handles vector lengths of 128bits. + + uint16_t vq = sve_vq_from_vl(lpContext->Vl); + + sve->vl = lpContext->Vl; + + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; + + for (int i = 0; i < 32; i++) + { + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + *(SVE128*) (((uint8_t*)sve) + SVE_SIG_ZREG_OFFSET(vq, i)) = lpContext->Z[i]; + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + } + } #endif // TARGET_OSX #elif defined(HOST_ARM) VfpSigFrame* fp = GetNativeSigSimdContext(native); @@ -805,6 +826,99 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) #endif //HOST_AMD64 && XSTATE_SUPPORTED } +#if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) +/*++ +Function : + _GetNativeSigSimdContext + + Finds the FP and SVE context from the reserved data section of a native context. + +Parameters : + uint8_t *data : native context reserved data. + uint32_t size : size of the reserved data. + fpsimd_context **fp_ptr : returns a pointer to the FP context. + sve_context **sve_ptr : returns a pointer to the SVE context. + +Return value : + None. + +--*/ +void _GetNativeSigSimdContext(uint8_t *data, uint32_t size, fpsimd_context **fp_ptr, sve_context **sve_ptr) +{ + size_t position = 0; + fpsimd_context *fp = nullptr; + sve_context *sve = nullptr; + extra_context *extra = nullptr; + bool done = false; + + while (!done) + { + _aarch64_ctx *ctx = reinterpret_cast<_aarch64_ctx *>(&data[position]); + + _ASSERTE(position + ctx->size <= size); + + + switch (ctx->magic) + { + case FPSIMD_MAGIC: + _ASSERTE(fp == nullptr); + _ASSERTE(ctx->size >= sizeof(fpsimd_context)); + fp = reinterpret_cast(&data[position]); + break; + + case SVE_MAGIC: + _ASSERTE(sve == nullptr); + _ASSERTE(ctx->size >= sizeof(sve_context)); + sve = reinterpret_cast(&data[position]); + break; + + case EXTRA_MAGIC: + { + // Points to an additional section of reserved data. + _ASSERTE(extra == nullptr); + _ASSERTE(ctx->size >= sizeof(extra_context)); + fpsimd_context *fpOrig = fp; + sve_context *sveOrig = sve; + + extra = reinterpret_cast(&data[position]); + _GetNativeSigSimdContext((uint8_t*)extra->datap, extra->size, &fp, &sve); + + // There should only be one block of each type. + _ASSERTE(fpOrig == nullptr || fp == fpOrig); + _ASSERTE(sveOrig == nullptr || sve == sveOrig); + break; + } + + case 0: + _ASSERTE(ctx->size == 0); + done = true; + break; + + default: + // Any other section. + _ASSERTE(ctx->size != 0); + break; + } + + position += ctx->size; + } + + if (fp) + { + *fp_ptr = fp; + } + if (sve) + { + // If this ever fires then we have an SVE context but no FP context. Given that V and Z + // registers overlap, then when propagating this data to other structures, the SVE + // context should be used to fill the FP data. + _ASSERTE(fp != nullptr); + + *sve_ptr = sve; + } +} +#endif // HOST_64BIT && HOST_ARM64 && !TARGET_FREEBSD && !TARGET_OSX + /*++ Function : CONTEXTFromNativeContext @@ -917,7 +1031,9 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex } } #else // TARGET_OSX - const fpsimd_context* fp = GetConstNativeSigSimdContext(native); + const fpsimd_context* fp = nullptr; + const sve_context* sve = nullptr; + GetConstNativeSigSimdContext(native, &fp, &sve); if (fp) { lpContext->Fpsr = fp->fpsr; @@ -927,6 +1043,25 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->V[i] = *(NEON128*) &fp->vregs[i]; } } + if (sve) + { + //TODO-SVE: This only handles vector lengths of 128bits. + + uint16_t vq = sve_vq_from_vl(sve->vl); + + lpContext->Vl = sve->vl; + + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); + + for (int i = 0; i < 32; i++) + { + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + lpContext->Z[i] = *(SVE128*) (((uint8_t*)sve) + SVE_SIG_ZREG_OFFSET(vq, i)); + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + } + } #endif // TARGET_OSX #elif defined(HOST_ARM) const VfpSigFrame* fp = GetConstNativeSigSimdContext(native); diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index bcb8424b702464..fd38ee59f14ba5 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -121,7 +121,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); #define SIZEOF__Frame 0x10 ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); -#define SIZEOF__CONTEXT 0x390 +#define SIZEOF__CONTEXT 0x5e0 ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); From 5e19a3e7e5adde66b73a26841d9cde4b5a6b6b58 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 21 Jun 2024 11:23:40 +0100 Subject: [PATCH 02/29] fix debug sizes --- .../src/System/Runtime/ExceptionServices/AsmOffsets.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 770cd170f0a0a2..73e496a0870cdd 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -71,9 +71,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8; #endif // TARGET_UNIX #elif TARGET_ARM64 - public const int SIZEOF__REGDISPLAY = 0x930; - public const int OFFSETOF__REGDISPLAY__SP = 0x890; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898; + public const int SIZEOF__REGDISPLAY = 0xdd0; + public const int OFFSETOF__REGDISPLAY__SP = 0xd30; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0xd38; #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x408; public const int OFFSETOF__REGDISPLAY__SP = 0x3e8; From b38dacd27e7c8ddc4138bd35f7fa5a0c41b159c2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 21 Jun 2024 11:39:08 +0100 Subject: [PATCH 03/29] Add SVE defines if missing from Linux host --- src/coreclr/pal/src/include/pal/context.h | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 7f3fe9113eddef..88893af4e5a0bb 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -60,6 +60,78 @@ using asm_sigcontext::_xstate; bool Xstate_IsAvx512Supported(); #endif // XSTATE_SUPPORTED || (HOST_AMD64 && HAVE_MACH_EXCEPTIONS) +#if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) +#if !defined(SVE_MAGIC) + +// Add the missing SVE defines + +#define SVE_MAGIC 0x53564501 + +struct sve_context { + struct _aarch64_ctx head; + __u16 vl; + __u16 flags; + __u16 __reserved[2]; +}; + +#define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + +#define sve_vq_from_vl(vl) ((vl) / __SVE_VQ_BYTES) +#define sve_vl_from_vq(vq) ((vq) * __SVE_VQ_BYTES) + +#define __SVE_ZREG_SIZE(vq) ((__u32)(vq) * __SVE_VQ_BYTES) +#define __SVE_PREG_SIZE(vq) ((__u32)(vq) * (__SVE_VQ_BYTES / 8)) +#define __SVE_FFR_SIZE(vq) __SVE_PREG_SIZE(vq) + +#define __SVE_ZREGS_OFFSET 0 +#define __SVE_ZREG_OFFSET(vq, n) \ + (__SVE_ZREGS_OFFSET + __SVE_ZREG_SIZE(vq) * (n)) +#define __SVE_ZREGS_SIZE(vq) \ + (__SVE_ZREG_OFFSET(vq, __SVE_NUM_ZREGS) - __SVE_ZREGS_OFFSET) + +#define __SVE_PREGS_OFFSET(vq) \ + (__SVE_ZREGS_OFFSET + __SVE_ZREGS_SIZE(vq)) +#define __SVE_PREG_OFFSET(vq, n) \ + (__SVE_PREGS_OFFSET(vq) + __SVE_PREG_SIZE(vq) * (n)) +#define __SVE_PREGS_SIZE(vq) \ + (__SVE_PREG_OFFSET(vq, __SVE_NUM_PREGS) - __SVE_PREGS_OFFSET(vq)) + +#define __SVE_FFR_OFFSET(vq) \ + (__SVE_PREGS_OFFSET(vq) + __SVE_PREGS_SIZE(vq)) + + +#define SVE_SIG_ZREG_SIZE(vq) __SVE_ZREG_SIZE(vq) +#define SVE_SIG_PREG_SIZE(vq) __SVE_PREG_SIZE(vq) +#define SVE_SIG_FFR_SIZE(vq) __SVE_FFR_SIZE(vq) + +#define SVE_SIG_REGS_OFFSET \ + ((sizeof(struct sve_context) + (__SVE_VQ_BYTES - 1)) \ + / __SVE_VQ_BYTES * __SVE_VQ_BYTES) + +#define SVE_SIG_ZREGS_OFFSET \ + (SVE_SIG_REGS_OFFSET + __SVE_ZREGS_OFFSET) +#define SVE_SIG_ZREG_OFFSET(vq, n) \ + (SVE_SIG_REGS_OFFSET + __SVE_ZREG_OFFSET(vq, n)) +#define SVE_SIG_ZREGS_SIZE(vq) __SVE_ZREGS_SIZE(vq) + +#define SVE_SIG_PREGS_OFFSET(vq) \ + (SVE_SIG_REGS_OFFSET + __SVE_PREGS_OFFSET(vq)) +#define SVE_SIG_PREG_OFFSET(vq, n) \ + (SVE_SIG_REGS_OFFSET + __SVE_PREG_OFFSET(vq, n)) +#define SVE_SIG_PREGS_SIZE(vq) __SVE_PREGS_SIZE(vq) + +#define SVE_SIG_FFR_OFFSET(vq) \ + (SVE_SIG_REGS_OFFSET + __SVE_FFR_OFFSET(vq)) + +#define SVE_SIG_REGS_SIZE(vq) \ + (__SVE_FFR_OFFSET(vq) + __SVE_FFR_SIZE(vq)) + +#define SVE_SIG_CONTEXT_SIZE(vq) \ + (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq)) + +#endif // SVE_MAGIC +#endif // HOST_64BIT && HOST_ARM64 && !TARGET_FREEBSD && !TARGET_OSX + #ifdef HOST_S390X #define MCREG_PSWMask(mc) ((mc).psw.mask) From 5fc68cccabbeb79e43419ac781ec05e0736d0e9d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 21 Jun 2024 12:19:56 +0100 Subject: [PATCH 04/29] More missing defines --- src/coreclr/pal/src/include/pal/context.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 88893af4e5a0bb..0ca3cb9cf5d0b2 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -76,6 +76,9 @@ struct sve_context { #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ +#define __SVE_NUM_ZREGS 32 +#define __SVE_NUM_PREGS 16 + #define sve_vq_from_vl(vl) ((vl) / __SVE_VQ_BYTES) #define sve_vl_from_vq(vq) ((vq) * __SVE_VQ_BYTES) From 41580bc3ca38243faf397eff86c1d404057b5a5b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 21 Jun 2024 13:03:29 +0100 Subject: [PATCH 05/29] More missing defines --- src/coreclr/pal/src/include/pal/context.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 0ca3cb9cf5d0b2..f9ceaeebe2ec27 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -65,6 +65,15 @@ bool Xstate_IsAvx512Supported(); // Add the missing SVE defines +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */ + __u32 size; /* size in bytes of the extra space */ + __u32 __reserved[3]; +}; + #define SVE_MAGIC 0x53564501 struct sve_context { From 6c8a283eafa982233a59e951bdc398a26733ab92 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 21 Jun 2024 14:17:44 +0100 Subject: [PATCH 06/29] Add cast --- src/coreclr/pal/src/include/pal/context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index f9ceaeebe2ec27..4058cb27188290 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -751,7 +751,7 @@ void _GetNativeSigSimdContext(uint8_t *data, uint32_t size, fpsimd_context **fp_ inline void GetNativeSigSimdContext(native_context_t *mc, fpsimd_context **fp_ptr, sve_context **sve_ptr) { - _GetNativeSigSimdContext(&mc->uc_mcontext.__reserved[0], sizeof(mc->uc_mcontext.__reserved), fp_ptr, sve_ptr); + _GetNativeSigSimdContext((uint8_t *)&mc->uc_mcontext.__reserved[0], sizeof(mc->uc_mcontext.__reserved), fp_ptr, sve_ptr); } inline From 7d08124745cf374b5bd7012b90b0872757cc0a99 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 24 Jun 2024 12:18:18 +0100 Subject: [PATCH 07/29] Move SVE registers after debug registers --- src/coreclr/debug/inc/dbgtargetcontext.h | 24 +++++++++++----------- src/coreclr/pal/inc/pal.h | 26 ++++++++++++------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index 5e3f3f974d831b..68b43297294a28 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -465,22 +465,22 @@ typedef DECLSPEC_ALIGN(16) struct { /* +0x314 */ DWORD Fpsr; // - // Sve Registers + // Debug registers // - //TODO-SVE: How does this structure handle variable sized Z/P/FFR registers? - /* +0x318 */ DWORD Vl; - /* +0x32C */ DT_SVE128 Z[32]; - /* +0x? */ WORD P[32]; - /* +0x? */ WORD Ffr; + + /* +0x318 */ DWORD Bcr[DT_ARM64_MAX_BREAKPOINTS]; + /* +0x338 */ DWORD64 Bvr[DT_ARM64_MAX_BREAKPOINTS]; + /* +0x378 */ DWORD Wcr[DT_ARM64_MAX_WATCHPOINTS]; + /* +0x380 */ DWORD64 Wvr[DT_ARM64_MAX_WATCHPOINTS]; // - // Debug registers + // Sve Registers // - - /* +0x? */ DWORD Bcr[DT_ARM64_MAX_BREAKPOINTS]; - /* +0x? */ DWORD64 Bvr[DT_ARM64_MAX_BREAKPOINTS]; - /* +0x? */ DWORD Wcr[DT_ARM64_MAX_WATCHPOINTS]; - /* +0x? */ DWORD64 Wvr[DT_ARM64_MAX_WATCHPOINTS]; + //TODO-SVE: Support Vector register sizes >128bit + /* +0x390 */ DWORD Vl; + /* +0x3a0 */ SVE128 Z[32]; + /* +0x5a0 */ DWORD P[16]; + /* +0x5e0 */ DWORD Ffr; /* +0x5e0 */ } DT_CONTEXT; diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 1d43cb63471704..ab27c936dd0b4b 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1942,23 +1942,23 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { /* +0x314 */ DWORD Fpsr; // - // Sve Registers + // Debug registers // - //TODO-SVE: How does this structure handle variable sized Z/P/FFR registers? - /* +0x318 */ DWORD Vl; - /* +0x32C */ SVE128 Z[32]; - /* +0x32C */ WORD P[32]; - /* +0x32C */ WORD Ffr; + + /* +0x318 */ DWORD Bcr[ARM64_MAX_BREAKPOINTS]; + /* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; + /* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; + /* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; // - // Debug registers + // Sve Registers // - - /* +0x? */ DWORD Bcr[ARM64_MAX_BREAKPOINTS]; - /* +0x? */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS]; - /* +0x? */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; - /* +0x? */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; - /* +0x? */ + //TODO-SVE: Support Vector register sizes >128bit + /* +0x390 */ DWORD Vl; + /* +0x3a0 */ SVE128 Z[32]; + /* +0x5a0 */ DWORD P[16]; + /* +0x5e0 */ DWORD Ffr; + /* +0x5e0 */ } CONTEXT, *PCONTEXT, *LPCONTEXT; From 5f918a9f638a831288805802d33f4b0798f8500a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 24 Jun 2024 13:53:29 +0100 Subject: [PATCH 08/29] Fix dbgtargetcontext --- src/coreclr/debug/inc/dbgtargetcontext.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index 68b43297294a28..b7ae53962aeb85 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -478,7 +478,7 @@ typedef DECLSPEC_ALIGN(16) struct { // //TODO-SVE: Support Vector register sizes >128bit /* +0x390 */ DWORD Vl; - /* +0x3a0 */ SVE128 Z[32]; + /* +0x3a0 */ DT_SVE128 Z[32]; /* +0x5a0 */ DWORD P[16]; /* +0x5e0 */ DWORD Ffr; /* +0x5e0 */ From 7cff0c1e8d057d5a4cd205020e0e8d911fd1823d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 24 Jun 2024 14:35:22 +0100 Subject: [PATCH 09/29] Remove SVE from debug context --- src/coreclr/debug/inc/dbgtargetcontext.h | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index b7ae53962aeb85..0659b0a851ebba 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -399,12 +399,6 @@ typedef struct { LONGLONG High; } DT_NEON128; -typedef struct { - ULONGLONG Low; - LONGLONG High; -} DT_SVE128; - - typedef DECLSPEC_ALIGN(16) struct { // // Control flags. @@ -472,20 +466,16 @@ typedef DECLSPEC_ALIGN(16) struct { /* +0x338 */ DWORD64 Bvr[DT_ARM64_MAX_BREAKPOINTS]; /* +0x378 */ DWORD Wcr[DT_ARM64_MAX_WATCHPOINTS]; /* +0x380 */ DWORD64 Wvr[DT_ARM64_MAX_WATCHPOINTS]; - - // - // Sve Registers - // - //TODO-SVE: Support Vector register sizes >128bit - /* +0x390 */ DWORD Vl; - /* +0x3a0 */ DT_SVE128 Z[32]; - /* +0x5a0 */ DWORD P[16]; - /* +0x5e0 */ DWORD Ffr; - /* +0x5e0 */ + /* +0x390 */ } DT_CONTEXT; + +#if !defined(CROSS_COMPILE) && !defined(TARGET_WINDOWS) +static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, Vl), "DT_CONTEXT must not include the SVE registers on AMD64"); +#else static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size on ARM64"); +#endif #elif defined(DTCONTEXT_IS_LOONGARCH64) From 3f287c5710c8a2a1e70c4f15180fe838e8f1abe7 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 24 Jun 2024 15:05:07 +0100 Subject: [PATCH 10/29] Move ffr --- src/coreclr/pal/inc/pal.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index ab27c936dd0b4b..9a0ba4e0553dfb 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1953,11 +1953,12 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // Sve Registers // - //TODO-SVE: Support Vector register sizes >128bit + // TODO-SVE: Support Vector register sizes >128bit /* +0x390 */ DWORD Vl; + /* +0x394 */ DWORD Ffr; + /* +0x398 */ DWORD64 __pad; // Ensure 128bit alignment /* +0x3a0 */ SVE128 Z[32]; /* +0x5a0 */ DWORD P[16]; - /* +0x5e0 */ DWORD Ffr; /* +0x5e0 */ } CONTEXT, *PCONTEXT, *LPCONTEXT; From e9e6a4e17e482cdd208bab293a3393468c0a6579 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 24 Jun 2024 16:43:02 +0100 Subject: [PATCH 11/29] Add SVE registers to asmconstants --- src/coreclr/pal/src/arch/arm64/asmconstants.h | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/src/coreclr/pal/src/arch/arm64/asmconstants.h b/src/coreclr/pal/src/arch/arm64/asmconstants.h index a657b8e5eb1c5e..acbfd81d717418 100644 --- a/src/coreclr/pal/src/arch/arm64/asmconstants.h +++ b/src/coreclr/pal/src/arch/arm64/asmconstants.h @@ -10,6 +10,7 @@ #define CONTEXT_INTEGER_BIT (1) #define CONTEXT_FLOATING_POINT_BIT (2) #define CONTEXT_DEBUG_REGISTERS_BIT (3) +#define CONTEXT_SVE_BIT (4) #define CONTEXT_CONTROL (CONTEXT_ARM64 | (1L << CONTEXT_CONTROL_BIT)) #define CONTEXT_INTEGER (CONTEXT_ARM64 | (1 << CONTEXT_INTEGER_BIT)) @@ -90,6 +91,65 @@ #define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31+16 #define CONTEXT_Fpcr 0 #define CONTEXT_Fpsr CONTEXT_Fpcr+8 -#define CONTEXT_Size ((CONTEXT_NEON_OFFSET + CONTEXT_Fpsr + 8 + 0xf) & ~0xf) + +// TODO-SVE: Support Vector register sizes >128bit + +#define CONTEXT_SVE_OFFSET CONTEXT_FLOAT_CONTROL_OFFSET+16 // Align to 128bits +#define CONTEXT_VL 0 +#define CONTEXT_FFR CONTEXT_Z0+8 + +#define CONTEXT_Z_OFFSET CONTEXT_SVE_OFFSET+16 // Align to 128bits +#define CONTEXT_Z0 0 +#define CONTEXT_Z1 CONTEXT_Z0+16 +#define CONTEXT_Z2 CONTEXT_Z1+16 +#define CONTEXT_Z3 CONTEXT_Z2+16 +#define CONTEXT_Z4 CONTEXT_Z3+16 +#define CONTEXT_Z5 CONTEXT_Z4+16 +#define CONTEXT_Z6 CONTEXT_Z5+16 +#define CONTEXT_Z7 CONTEXT_Z6+16 +#define CONTEXT_Z8 CONTEXT_Z7+16 +#define CONTEXT_Z9 CONTEXT_Z8+16 +#define CONTEXT_Z10 CONTEXT_Z9+16 +#define CONTEXT_Z11 CONTEXT_Z10+16 +#define CONTEXT_Z12 CONTEXT_Z11+16 +#define CONTEXT_Z13 CONTEXT_Z12+16 +#define CONTEXT_Z14 CONTEXT_Z13+16 +#define CONTEXT_Z15 CONTEXT_Z14+16 +#define CONTEXT_Z16 CONTEXT_Z15+16 +#define CONTEXT_Z17 CONTEXT_Z16+16 +#define CONTEXT_Z18 CONTEXT_Z17+16 +#define CONTEXT_Z19 CONTEXT_Z18+16 +#define CONTEXT_Z20 CONTEXT_Z19+16 +#define CONTEXT_Z21 CONTEXT_Z20+16 +#define CONTEXT_Z22 CONTEXT_Z21+16 +#define CONTEXT_Z23 CONTEXT_Z22+16 +#define CONTEXT_Z24 CONTEXT_Z23+16 +#define CONTEXT_Z25 CONTEXT_Z24+16 +#define CONTEXT_Z26 CONTEXT_Z25+16 +#define CONTEXT_Z27 CONTEXT_Z26+16 +#define CONTEXT_Z28 CONTEXT_Z27+16 +#define CONTEXT_Z29 CONTEXT_Z28+16 +#define CONTEXT_Z30 CONTEXT_Z29+16 +#define CONTEXT_Z31 CONTEXT_Z30+16 + +#define CONTEXT_P_OFFSET CONTEXT_Z_OFFSET + CONTEXT_Z31 + 16 +#define CONTEXT_P0 0 +#define CONTEXT_P1 CONTEXT_P0+4 +#define CONTEXT_P2 CONTEXT_P1+4 +#define CONTEXT_P3 CONTEXT_P2+4 +#define CONTEXT_P4 CONTEXT_P3+4 +#define CONTEXT_P5 CONTEXT_P4+4 +#define CONTEXT_P6 CONTEXT_P5+4 +#define CONTEXT_P7 CONTEXT_P6+4 +#define CONTEXT_P8 CONTEXT_P7+4 +#define CONTEXT_P9 CONTEXT_P8+4 +#define CONTEXT_P10 CONTEXT_P9+4 +#define CONTEXT_P11 CONTEXT_P10+4 +#define CONTEXT_P12 CONTEXT_P11+4 +#define CONTEXT_P13 CONTEXT_P12+4 +#define CONTEXT_P14 CONTEXT_P13+4 +#define CONTEXT_P15 CONTEXT_P14+4 + +#define CONTEXT_Size (CONTEXT_P_OFFSET + CONTEXT_P15 + 4) #endif From a2c17dd9439cf81772ed53017aa965fdb627bebc Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 24 Jun 2024 19:01:42 +0100 Subject: [PATCH 12/29] Remove Z registers from context --- .../System/Runtime/ExceptionServices/AsmOffsets.cs | 14 +++++++------- src/coreclr/pal/inc/pal.h | 6 +++--- src/coreclr/pal/src/thread/context.cpp | 4 ++-- src/coreclr/vm/arm64/asmconstants.h | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 73e496a0870cdd..d81e428fc95e46 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -25,9 +25,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX #elif TARGET_ARM64 - public const int SIZEOF__REGDISPLAY = 0xde0; - public const int OFFSETOF__REGDISPLAY__SP = 0xd38; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0xd40; + public const int SIZEOF__REGDISPLAY = 0x9e0; + public const int OFFSETOF__REGDISPLAY__SP = 0x938; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940; #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; public const int OFFSETOF__REGDISPLAY__SP = 0x3ec; @@ -71,9 +71,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8; #endif // TARGET_UNIX #elif TARGET_ARM64 - public const int SIZEOF__REGDISPLAY = 0xdd0; - public const int OFFSETOF__REGDISPLAY__SP = 0xd30; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0xd38; + public const int SIZEOF__REGDISPLAY = 0x9d0; + public const int OFFSETOF__REGDISPLAY__SP = 0x930; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938; #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x408; public const int OFFSETOF__REGDISPLAY__SP = 0x3e8; @@ -113,7 +113,7 @@ class AsmOffsets public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIx #elif TARGET_ARM64 - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x5e0; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0; #elif TARGET_ARM public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0; #elif TARGET_X86 diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 9a0ba4e0553dfb..0162c1e9be1e30 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1954,12 +1954,12 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // Sve Registers // // TODO-SVE: Support Vector register sizes >128bit + // For 128bit, Z and V registers fully overlap, so there is no need to load/store both. /* +0x390 */ DWORD Vl; /* +0x394 */ DWORD Ffr; /* +0x398 */ DWORD64 __pad; // Ensure 128bit alignment - /* +0x3a0 */ SVE128 Z[32]; - /* +0x5a0 */ DWORD P[16]; - /* +0x5e0 */ + /* +0x3a0 */ DWORD P[16]; + /* +0x3e0 */ } CONTEXT, *PCONTEXT, *LPCONTEXT; diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 7056eb049ac978..baf92ad5801b7e 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -733,8 +733,8 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) for (int i = 0; i < 32; i++) { + //TODO-SVE: Copy SVE registers once they are >128bits //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - *(SVE128*) (((uint8_t*)sve) + SVE_SIG_ZREG_OFFSET(vq, i)) = lpContext->Z[i]; //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; } @@ -1056,8 +1056,8 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex for (int i = 0; i < 32; i++) { + //TODO-SVE: Copy SVE registers once they are >128bits //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - lpContext->Z[i] = *(SVE128*) (((uint8_t*)sve) + SVE_SIG_ZREG_OFFSET(vq, i)); //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); } diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index fd38ee59f14ba5..51aa7b0e5a42f0 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -121,7 +121,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); #define SIZEOF__Frame 0x10 ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); -#define SIZEOF__CONTEXT 0x5e0 +#define SIZEOF__CONTEXT 0x3e0 ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); From 7c3256b2c9d71e17d56b0e4d280568c05363d415 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 07:47:41 +0100 Subject: [PATCH 13/29] backup/restore SVE in Context2.S Change-Id: I2f6bc39068d9fed3f45b548089b144884607d97b --- src/coreclr/jit/instrsarm64sve.h | 2 +- src/coreclr/pal/src/arch/arm64/asmconstants.h | 77 ++++++------------- src/coreclr/pal/src/arch/arm64/context2.S | 60 ++++++++++++++- 3 files changed, 82 insertions(+), 57 deletions(-) diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index e29899d981b6d4..2ddc1e4f379495 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -280,7 +280,7 @@ INST6(ld1b, "ld1b", 0, IF_SV // enum name info SVE_HY_3A SVE_HY_3A_A SVE_HY_3B SVE_HZ_2A_B SVE_IA_2A SVE_IB_3A -INST6(prfb, "prfb", 0, IF_SVE_6E, 0x84200000, 0xC4200000, 0xC4608000, 0x8400E000, 0x85C00000, 0x8400C000 ) +INST6(prfb, "prfb", 0, IF_SVE_6E, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff ) // PRFB , , [, .S, ] SVE_HY_3A 100001000h1mmmmm 000gggnnnnn0oooo 8420 0000 // PRFB , , [, .D, ] SVE_HY_3A_A 110001000h1mmmmm 000gggnnnnn0oooo C420 0000 // PRFB , , [, .D] SVE_HY_3B 11000100011mmmmm 100gggnnnnn0oooo C460 8000 diff --git a/src/coreclr/pal/src/arch/arm64/asmconstants.h b/src/coreclr/pal/src/arch/arm64/asmconstants.h index acbfd81d717418..48dcee5906fea6 100644 --- a/src/coreclr/pal/src/arch/arm64/asmconstants.h +++ b/src/coreclr/pal/src/arch/arm64/asmconstants.h @@ -55,6 +55,7 @@ #define CONTEXT_Lr CONTEXT_Fp+8 #define CONTEXT_Sp CONTEXT_Lr+8 #define CONTEXT_Pc CONTEXT_Sp+8 + #define CONTEXT_NEON_OFFSET CONTEXT_Pc+8 #define CONTEXT_V0 0 #define CONTEXT_V1 CONTEXT_V0+16 @@ -93,63 +94,29 @@ #define CONTEXT_Fpsr CONTEXT_Fpcr+8 // TODO-SVE: Support Vector register sizes >128bit +// SVE register offsets are multiples of the vector length +// For 128bit, Z and V registers fully overlap, so there is no need to load/store both. -#define CONTEXT_SVE_OFFSET CONTEXT_FLOAT_CONTROL_OFFSET+16 // Align to 128bits -#define CONTEXT_VL 0 -#define CONTEXT_FFR CONTEXT_Z0+8 - -#define CONTEXT_Z_OFFSET CONTEXT_SVE_OFFSET+16 // Align to 128bits -#define CONTEXT_Z0 0 -#define CONTEXT_Z1 CONTEXT_Z0+16 -#define CONTEXT_Z2 CONTEXT_Z1+16 -#define CONTEXT_Z3 CONTEXT_Z2+16 -#define CONTEXT_Z4 CONTEXT_Z3+16 -#define CONTEXT_Z5 CONTEXT_Z4+16 -#define CONTEXT_Z6 CONTEXT_Z5+16 -#define CONTEXT_Z7 CONTEXT_Z6+16 -#define CONTEXT_Z8 CONTEXT_Z7+16 -#define CONTEXT_Z9 CONTEXT_Z8+16 -#define CONTEXT_Z10 CONTEXT_Z9+16 -#define CONTEXT_Z11 CONTEXT_Z10+16 -#define CONTEXT_Z12 CONTEXT_Z11+16 -#define CONTEXT_Z13 CONTEXT_Z12+16 -#define CONTEXT_Z14 CONTEXT_Z13+16 -#define CONTEXT_Z15 CONTEXT_Z14+16 -#define CONTEXT_Z16 CONTEXT_Z15+16 -#define CONTEXT_Z17 CONTEXT_Z16+16 -#define CONTEXT_Z18 CONTEXT_Z17+16 -#define CONTEXT_Z19 CONTEXT_Z18+16 -#define CONTEXT_Z20 CONTEXT_Z19+16 -#define CONTEXT_Z21 CONTEXT_Z20+16 -#define CONTEXT_Z22 CONTEXT_Z21+16 -#define CONTEXT_Z23 CONTEXT_Z22+16 -#define CONTEXT_Z24 CONTEXT_Z23+16 -#define CONTEXT_Z25 CONTEXT_Z24+16 -#define CONTEXT_Z26 CONTEXT_Z25+16 -#define CONTEXT_Z27 CONTEXT_Z26+16 -#define CONTEXT_Z28 CONTEXT_Z27+16 -#define CONTEXT_Z29 CONTEXT_Z28+16 -#define CONTEXT_Z30 CONTEXT_Z29+16 -#define CONTEXT_Z31 CONTEXT_Z30+16 +#define CONTEXT_SVE_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_Fpsr+8 +#define CONTEXT_FFR_VL 0 -#define CONTEXT_P_OFFSET CONTEXT_Z_OFFSET + CONTEXT_Z31 + 16 -#define CONTEXT_P0 0 -#define CONTEXT_P1 CONTEXT_P0+4 -#define CONTEXT_P2 CONTEXT_P1+4 -#define CONTEXT_P3 CONTEXT_P2+4 -#define CONTEXT_P4 CONTEXT_P3+4 -#define CONTEXT_P5 CONTEXT_P4+4 -#define CONTEXT_P6 CONTEXT_P5+4 -#define CONTEXT_P7 CONTEXT_P6+4 -#define CONTEXT_P8 CONTEXT_P7+4 -#define CONTEXT_P9 CONTEXT_P8+4 -#define CONTEXT_P10 CONTEXT_P9+4 -#define CONTEXT_P11 CONTEXT_P10+4 -#define CONTEXT_P12 CONTEXT_P11+4 -#define CONTEXT_P13 CONTEXT_P12+4 -#define CONTEXT_P14 CONTEXT_P13+4 -#define CONTEXT_P15 CONTEXT_P14+4 +#define CONTEXT_P0_VL CONTEXT_FFR_VL+1 +#define CONTEXT_P1_VL CONTEXT_P0_VL+1 +#define CONTEXT_P2_VL CONTEXT_P1_VL+1 +#define CONTEXT_P3_VL CONTEXT_P2_VL+1 +#define CONTEXT_P4_VL CONTEXT_P3_VL+1 +#define CONTEXT_P5_VL CONTEXT_P4_VL+1 +#define CONTEXT_P6_VL CONTEXT_P5_VL+1 +#define CONTEXT_P7_VL CONTEXT_P6_VL+1 +#define CONTEXT_P8_VL CONTEXT_P7_VL+1 +#define CONTEXT_P9_VL CONTEXT_P8_VL+1 +#define CONTEXT_P10_VL CONTEXT_P9_VL+1 +#define CONTEXT_P11_VL CONTEXT_P10_VL+1 +#define CONTEXT_P12_VL CONTEXT_P11_VL+1 +#define CONTEXT_P13_VL CONTEXT_P12_VL+1 +#define CONTEXT_P14_VL CONTEXT_P13_VL+1 +#define CONTEXT_P15_VL CONTEXT_P14_VL+1 -#define CONTEXT_Size (CONTEXT_P_OFFSET + CONTEXT_P15 + 4) +#define CONTEXT_Size CONTEXT_SVE_OFFSET + ((CONTEXT_P15_VL+1) * 4) #endif diff --git a/src/coreclr/pal/src/arch/arm64/context2.S b/src/coreclr/pal/src/arch/arm64/context2.S index 23bc0c065581e0..12af7e390a3a34 100644 --- a/src/coreclr/pal/src/arch/arm64/context2.S +++ b/src/coreclr/pal/src/arch/arm64/context2.S @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // -// Implementation of _CONTEXT_CaptureContext for the ARM platform. +// Implementation of _CONTEXT_CaptureContext for the ARM64 platform. // This function is processor dependent. It is used by exception handling, // and is always apply to the current thread. // @@ -12,6 +12,7 @@ // Incoming: // x0: Context* // +.arch_extension sve LEAF_ENTRY CONTEXT_CaptureContext, _TEXT PROLOG_STACK_ALLOC 32 .cfi_adjust_cfa_offset 32 @@ -104,6 +105,38 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER): sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x8 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_SVE) + + add x0, x0, CONTEXT_SVE_OFFSET + str p0, [x0, CONTEXT_P0_VL, MUL VL] + str p1, [x0, CONTEXT_P1_VL, MUL VL] + str p2, [x0, CONTEXT_P2_VL, MUL VL] + str p3, [x0, CONTEXT_P3_VL, MUL VL] + str p4, [x0, CONTEXT_P4_VL, MUL VL] + str p5, [x0, CONTEXT_P5_VL, MUL VL] + str p6, [x0, CONTEXT_P6_VL, MUL VL] + str p7, [x0, CONTEXT_P7_VL, MUL VL] + str p8, [x0, CONTEXT_P8_VL, MUL VL] + str p9, [x0, CONTEXT_P9_VL, MUL VL] + str p10, [x0, CONTEXT_P10_VL, MUL VL] + str p11, [x0, CONTEXT_P11_VL, MUL VL] + str p12, [x0, CONTEXT_P12_VL, MUL VL] + str p13, [x0, CONTEXT_P13_VL, MUL VL] + str p14, [x0, CONTEXT_P14_VL, MUL VL] + str p15, [x0, CONTEXT_P15_VL, MUL VL] + rdffr p0.b + str p0, [x0, CONTEXT_FFR_VL, MUL VL] + ldr p0, [x0, 0, MUL VL] + sub x0, x0, CONTEXT_SVE_OFFSET + +LOCAL_LABEL(Done_CONTEXT_SVE): EPILOG_STACK_FREE 32 ret @@ -133,6 +166,7 @@ LEAF_END RtlCaptureContext, _TEXT // x0: Context* // x1: Exception* // +.arch_extension sve LEAF_ENTRY RtlRestoreContext, _TEXT #ifdef HAS_ADDRESS_SANITIZER @@ -155,6 +189,30 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT): mov x16, x0 ldr w17, [x16, CONTEXT_ContextFlags] + tbz w17, #CONTEXT_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE) + + add x16, x16, CONTEXT_SVE_OFFSET + ldr p0, [x16, CONTEXT_FFR_VL, MUL VL] + wrffr p0.b + ldr p0, [x16, CONTEXT_P0_VL, MUL VL] + ldr p1, [x16, CONTEXT_P1_VL, MUL VL] + ldr p2, [x16, CONTEXT_P2_VL, MUL VL] + ldr p3, [x16, CONTEXT_P3_VL, MUL VL] + ldr p4, [x16, CONTEXT_P4_VL, MUL VL] + ldr p5, [x16, CONTEXT_P5_VL, MUL VL] + ldr p6, [x16, CONTEXT_P6_VL, MUL VL] + ldr p7, [x16, CONTEXT_P7_VL, MUL VL] + ldr p8, [x16, CONTEXT_P8_VL, MUL VL] + ldr p9, [x16, CONTEXT_P9_VL, MUL VL] + ldr p10, [x16, CONTEXT_P10_VL, MUL VL] + ldr p11, [x16, CONTEXT_P11_VL, MUL VL] + ldr p12, [x16, CONTEXT_P12_VL, MUL VL] + ldr p13, [x16, CONTEXT_P13_VL, MUL VL] + ldr p14, [x16, CONTEXT_P14_VL, MUL VL] + ldr p15, [x16, CONTEXT_P15_VL, MUL VL] + sub x16, x16, CONTEXT_SVE_OFFSET + +LOCAL_LABEL(No_Restore_CONTEXT_SVE): tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT) add x16, x16, CONTEXT_NEON_OFFSET From ba17c2b4c1096f35653ca052197868cb38e90038 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 10:04:30 +0100 Subject: [PATCH 14/29] Remove unused SVE128 struct --- src/coreclr/pal/inc/pal.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 0162c1e9be1e30..a4e2b8fa34aa90 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1876,11 +1876,6 @@ typedef struct _NEON128 { LONGLONG High; } NEON128, *PNEON128; -typedef struct _SVE128 { - ULONGLONG Low; - LONGLONG High; -} SVE128, *PSVE128; - typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // From 73404ffa2dc127b366679c7c5f4d0db8dd7ddb20 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 12:25:45 +0100 Subject: [PATCH 15/29] Add XStateFeaturesMask --- .../Runtime/ExceptionServices/AsmOffsets.cs | 14 +++++++------- src/coreclr/debug/inc/dbgtargetcontext.h | 2 +- src/coreclr/pal/inc/pal.h | 6 ++++++ src/coreclr/pal/src/arch/arm64/asmconstants.h | 9 +++++++-- src/coreclr/pal/src/arch/arm64/context2.S | 16 ++++++---------- src/coreclr/pal/src/thread/context.cpp | 10 ++++++++-- src/coreclr/vm/arm64/asmconstants.h | 2 +- 7 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index d81e428fc95e46..8cb2659efadecc 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -25,9 +25,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX #elif TARGET_ARM64 - public const int SIZEOF__REGDISPLAY = 0x9e0; - public const int OFFSETOF__REGDISPLAY__SP = 0x938; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940; + public const int SIZEOF__REGDISPLAY = 0xa00; + public const int OFFSETOF__REGDISPLAY__SP = 0x958; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x960; #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; public const int OFFSETOF__REGDISPLAY__SP = 0x3ec; @@ -71,9 +71,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8; #endif // TARGET_UNIX #elif TARGET_ARM64 - public const int SIZEOF__REGDISPLAY = 0x9d0; - public const int OFFSETOF__REGDISPLAY__SP = 0x930; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938; + public const int SIZEOF__REGDISPLAY = 0x9f0; + public const int OFFSETOF__REGDISPLAY__SP = 0x950; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x958; #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x408; public const int OFFSETOF__REGDISPLAY__SP = 0x3e8; @@ -113,7 +113,7 @@ class AsmOffsets public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; #endif // TARGET_UNIx #elif TARGET_ARM64 - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3f0; #elif TARGET_ARM public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0; #elif TARGET_X86 diff --git a/src/coreclr/debug/inc/dbgtargetcontext.h b/src/coreclr/debug/inc/dbgtargetcontext.h index 0659b0a851ebba..dab7ca29c7db33 100644 --- a/src/coreclr/debug/inc/dbgtargetcontext.h +++ b/src/coreclr/debug/inc/dbgtargetcontext.h @@ -472,7 +472,7 @@ typedef DECLSPEC_ALIGN(16) struct { #if !defined(CROSS_COMPILE) && !defined(TARGET_WINDOWS) -static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, Vl), "DT_CONTEXT must not include the SVE registers on AMD64"); +static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, XStateFeaturesMask), "DT_CONTEXT must not include the SVE registers on AMD64"); #else static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size on ARM64"); #endif diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index a4e2b8fa34aa90..8885cde07e896f 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1844,6 +1844,10 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY { #define CONTEXT_EXCEPTION_REQUEST 0x40000000L #define CONTEXT_EXCEPTION_REPORTING 0x80000000L +#define XSTATE_SVE (0) + +#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE)) + // // This flag is set by the unwinder if it has unwound to a call // site, and cleared whenever it unwinds through a trap frame. @@ -1945,6 +1949,8 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { /* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS]; /* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS]; + /* +0x390 */ DWORD64 XStateFeaturesMask; + // // Sve Registers // diff --git a/src/coreclr/pal/src/arch/arm64/asmconstants.h b/src/coreclr/pal/src/arch/arm64/asmconstants.h index 48dcee5906fea6..59ef9d879f66ca 100644 --- a/src/coreclr/pal/src/arch/arm64/asmconstants.h +++ b/src/coreclr/pal/src/arch/arm64/asmconstants.h @@ -10,7 +10,6 @@ #define CONTEXT_INTEGER_BIT (1) #define CONTEXT_FLOATING_POINT_BIT (2) #define CONTEXT_DEBUG_REGISTERS_BIT (3) -#define CONTEXT_SVE_BIT (4) #define CONTEXT_CONTROL (CONTEXT_ARM64 | (1L << CONTEXT_CONTROL_BIT)) #define CONTEXT_INTEGER (CONTEXT_ARM64 | (1 << CONTEXT_INTEGER_BIT)) @@ -19,6 +18,10 @@ #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) +#define XSTATE_SVE (0) + +#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE)) + #define CONTEXT_ContextFlags 0 #define CONTEXT_Cpsr CONTEXT_ContextFlags+4 @@ -93,11 +96,13 @@ #define CONTEXT_Fpcr 0 #define CONTEXT_Fpsr CONTEXT_Fpcr+8 +#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+8 + // TODO-SVE: Support Vector register sizes >128bit // SVE register offsets are multiples of the vector length // For 128bit, Z and V registers fully overlap, so there is no need to load/store both. -#define CONTEXT_SVE_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_Fpsr+8 +#define CONTEXT_SVE_OFFSET CONTEXT_XSTATEFEATURESMASK_OFFSET+8 #define CONTEXT_FFR_VL 0 #define CONTEXT_P0_VL CONTEXT_FFR_VL+1 diff --git a/src/coreclr/pal/src/arch/arm64/context2.S b/src/coreclr/pal/src/arch/arm64/context2.S index 12af7e390a3a34..5b54f110455c2e 100644 --- a/src/coreclr/pal/src/arch/arm64/context2.S +++ b/src/coreclr/pal/src/arch/arm64/context2.S @@ -105,14 +105,8 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER): sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): - ldr w1, [x0, CONTEXT_ContextFlags] - // clangs assembler doesn't seem to support the mov Wx, imm32 yet - movz w2, #0x40, lsl #16 - movk w2, #0x8 - mov w3, w2 - and w2, w1, w2 - cmp w2, w3 - b.ne LOCAL_LABEL(Done_CONTEXT_SVE) + ldr x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] + tbz x1, #XSTATE_SVE, LOCAL_LABEL(Done_CONTEXT_SVE) add x0, x0, CONTEXT_SVE_OFFSET str p0, [x0, CONTEXT_P0_VL, MUL VL] @@ -157,6 +151,8 @@ LEAF_ENTRY RtlCaptureContext, _TEXT orr w1, w1, #0x4 orr w1, w1, #0x8 str w1, [x0, CONTEXT_ContextFlags] + movz x1, 0 + str x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] ldr x1, [sp] EPILOG_STACK_FREE 16 b C_FUNC(CONTEXT_CaptureContext) @@ -188,8 +184,8 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT): // since we potentially clobber x0 below, we'll bank it in x16 mov x16, x0 - ldr w17, [x16, CONTEXT_ContextFlags] - tbz w17, #CONTEXT_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE) + ldr w17, [x16, CONTEXT_XSTATEFEATURESMASK_OFFSET] + tbz w17, #XSTATE_SVE, LOCAL_LABEL(No_Restore_CONTEXT_SVE) add x16, x16, CONTEXT_SVE_OFFSET ldr p0, [x16, CONTEXT_FFR_VL, MUL VL] diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index baf92ad5801b7e..bf9bd6bfd20040 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -724,9 +724,12 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) { //TODO-SVE: This only handles vector lengths of 128bits. + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); + uint16_t vq = sve_vq_from_vl(lpContext->Vl); - sve->vl = lpContext->Vl; + // Vector length should not have changed. + _ASSERTE(lpContext->Vl == sve->vl); //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; @@ -1047,9 +1050,12 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex { //TODO-SVE: This only handles vector lengths of 128bits. + lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; + uint16_t vq = sve_vq_from_vl(sve->vl); - lpContext->Vl = sve->vl; + _ASSERTE(sve->vl > 0); + lpContext->Vl = sve->vl; //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 51aa7b0e5a42f0..f44a121182f796 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -121,7 +121,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); #define SIZEOF__Frame 0x10 ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); -#define SIZEOF__CONTEXT 0x3e0 +#define SIZEOF__CONTEXT 0x3f0 ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); From ea6979a247f6de9dfd2a588afdc8fc956d1a2239 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 14:37:37 +0100 Subject: [PATCH 16/29] restore instrsarm64sve.h changes --- src/coreclr/jit/instrsarm64sve.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 2ddc1e4f379495..e29899d981b6d4 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -280,7 +280,7 @@ INST6(ld1b, "ld1b", 0, IF_SV // enum name info SVE_HY_3A SVE_HY_3A_A SVE_HY_3B SVE_HZ_2A_B SVE_IA_2A SVE_IB_3A -INST6(prfb, "prfb", 0, IF_SVE_6E, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff ) +INST6(prfb, "prfb", 0, IF_SVE_6E, 0x84200000, 0xC4200000, 0xC4608000, 0x8400E000, 0x85C00000, 0x8400C000 ) // PRFB , , [, .S, ] SVE_HY_3A 100001000h1mmmmm 000gggnnnnn0oooo 8420 0000 // PRFB , , [, .D, ] SVE_HY_3A_A 110001000h1mmmmm 000gggnnnnn0oooo C420 0000 // PRFB , , [, .D] SVE_HY_3B 11000100011mmmmm 100gggnnnnn0oooo C460 8000 From dd12f0369385a15221b145e92407c99dace343e5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 16:32:08 +0100 Subject: [PATCH 17/29] Restore SIZEOF__CONTEXT for windows --- src/coreclr/vm/arm64/asmconstants.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index f44a121182f796..3cafbfb719f380 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -121,7 +121,11 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); #define SIZEOF__Frame 0x10 ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); +#if !defined(HOST_WINDOWS) #define SIZEOF__CONTEXT 0x3f0 +#else +#define SIZEOF__CONTEXT 0x390 +#endif ASMCONSTANTS_C_ASSERT(SIZEOF__CONTEXT == sizeof(T_CONTEXT)); From 29acc33e863ebb0806f00173f6c614d7d6dfbbc1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 17:21:52 +0100 Subject: [PATCH 18/29] Fix AsmOffsets.cs for windows --- .../System/Runtime/ExceptionServices/AsmOffsets.cs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 8cb2659efadecc..37ab3584d5d3ad 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -25,9 +25,15 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX #elif TARGET_ARM64 +#if TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0xa00; public const int OFFSETOF__REGDISPLAY__SP = 0x958; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x960; +#else // TARGET_UNIX + public const int SIZEOF__REGDISPLAY = 0x930; + public const int OFFSETOF__REGDISPLAY__SP = 0x890; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898; +#endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; public const int OFFSETOF__REGDISPLAY__SP = 0x3ec; @@ -111,9 +117,13 @@ class AsmOffsets public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0; -#endif // TARGET_UNIx +#endif // TARGET_UNIX #elif TARGET_ARM64 +#if TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3f0; +#else // TARGET_UNIX + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390; +#endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0; #elif TARGET_X86 From a21aee0d2e3797d0f6a85a1d26b55bb611789cc8 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 25 Jun 2024 19:33:38 +0100 Subject: [PATCH 19/29] Fix AsmOffsets.cs for windows --- .../System/Runtime/ExceptionServices/AsmOffsets.cs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 37ab3584d5d3ad..88bfa97bcfda57 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -30,9 +30,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__SP = 0x958; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x960; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x930; - public const int OFFSETOF__REGDISPLAY__SP = 0x890; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898; + public const int SIZEOF__REGDISPLAY = 0x940; + public const int OFFSETOF__REGDISPLAY__SP = 0x898; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; #endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; @@ -77,9 +77,15 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8; #endif // TARGET_UNIX #elif TARGET_ARM64 +#if TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0x9f0; public const int OFFSETOF__REGDISPLAY__SP = 0x950; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x958; +#else // TARGET_UNIX + public const int SIZEOF__REGDISPLAY = 0x930; + public const int OFFSETOF__REGDISPLAY__SP = 0x890; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898; +#endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x408; public const int OFFSETOF__REGDISPLAY__SP = 0x3e8; From 2e549cd0036bb0f73d72a51b3424f8fc8bed39a7 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 26 Jun 2024 15:27:40 +0100 Subject: [PATCH 20/29] Restore missing ldr --- src/coreclr/pal/src/arch/arm64/context2.S | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/pal/src/arch/arm64/context2.S b/src/coreclr/pal/src/arch/arm64/context2.S index 5b54f110455c2e..d0734fec875350 100644 --- a/src/coreclr/pal/src/arch/arm64/context2.S +++ b/src/coreclr/pal/src/arch/arm64/context2.S @@ -209,6 +209,7 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT): sub x16, x16, CONTEXT_SVE_OFFSET LOCAL_LABEL(No_Restore_CONTEXT_SVE): + ldr w17, [x16, CONTEXT_ContextFlags] tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT) add x16, x16, CONTEXT_NEON_OFFSET From d43f5d58df337c65249f007086ea98e02e5fde97 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 27 Jun 2024 08:13:37 +0100 Subject: [PATCH 21/29] Check size of SVE data returned from the kernel --- src/coreclr/pal/src/thread/context.cpp | 42 +++++++++++++++++--------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index bf9bd6bfd20040..4602381114e1aa 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -720,26 +720,38 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) *(NEON128*) &fp->vregs[i] = lpContext->V[i]; } } + if (sve) { - //TODO-SVE: This only handles vector lengths of 128bits. + // Sve context may be present when SVE registers are not live. If so then + // there is no SVE data to restore. + if (sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + { + //TODO-SVE: This only handles vector lengths of 128bits. - _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); - uint16_t vq = sve_vq_from_vl(lpContext->Vl); + uint16_t vq = sve_vq_from_vl(lpContext->Vl); - // Vector length should not have changed. - _ASSERTE(lpContext->Vl == sve->vl); + // Vector length should not have changed. + _ASSERTE(lpContext->Vl == sve->vl); - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; - for (int i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) + { + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + } + } + else { - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + // If this happens that somehow lpContext is filled with SVE state, but the kernel + // thinks SVE is not live yet. + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == 0); } } #endif // TARGET_OSX @@ -1046,7 +1058,9 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->V[i] = *(NEON128*) &fp->vregs[i]; } } - if (sve) + // Sve context may be present when SVE registers are not live. If so then + // there is no SVE data to save. + if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) { //TODO-SVE: This only handles vector lengths of 128bits. @@ -1054,7 +1068,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex uint16_t vq = sve_vq_from_vl(sve->vl); - _ASSERTE(sve->vl > 0); + _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); lpContext->Vl = sve->vl; //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. From ed15cc321e2c1117d69373dff96a47305d6ac827 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 27 Jun 2024 08:30:42 +0100 Subject: [PATCH 22/29] 16 P registers --- src/coreclr/pal/src/thread/context.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 4602381114e1aa..80816e69bf3fad 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -739,10 +739,11 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; - for (int i = 0; i < 32; i++) + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) { - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; } @@ -1074,10 +1075,11 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); - for (int i = 0; i < 32; i++) + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) { - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); } From 063f41bcec442b15c8810797dbdb0277d82691f8 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 07:40:28 +0100 Subject: [PATCH 23/29] Copy context based on XSTATE_MASK_SVE --- src/coreclr/pal/src/thread/context.cpp | 80 ++++++++++++++------------ 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 80816e69bf3fad..53051b80d3ce62 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -721,38 +721,28 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } } - if (sve) + if (sve && (lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE) { - // Sve context may be present when SVE registers are not live. If so then - // there is no SVE data to restore. - if (sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) - { - //TODO-SVE: This only handles vector lengths of 128bits. + //TODO-SVE: This only handles vector lengths of 128bits. - _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); + // If this is hit then the kernel does not think the SVE registers are live yet. + _ASSERT(sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))); - uint16_t vq = sve_vq_from_vl(lpContext->Vl); + uint16_t vq = sve_vq_from_vl(lpContext->Vl); - // Vector length should not have changed. - _ASSERTE(lpContext->Vl == sve->vl); + // Vector length should not have changed. + _ASSERTE(lpContext->Vl == sve->vl); - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - for (int i = 0; i < 16; i++) - { - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; - } - } - else + for (int i = 0; i < 16; i++) { - // If this happens that somehow lpContext is filled with SVE state, but the kernel - // thinks SVE is not live yet. - _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == 0); + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; } } #endif // TARGET_OSX @@ -1059,29 +1049,43 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->V[i] = *(NEON128*) &fp->vregs[i]; } } - // Sve context may be present when SVE registers are not live. If so then - // there is no SVE data to save. - if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + + if (sve) { //TODO-SVE: This only handles vector lengths of 128bits. - lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; - - uint16_t vq = sve_vq_from_vl(sve->vl); - _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); lpContext->Vl = sve->vl; - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); + if (sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + { + uint16_t vq = sve_vq_from_vl(sve->vl); - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; - for (int i = 0; i < 16; i++) + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); + + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + } + } + else { - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + // Sve context is not present due to SVE registers not being live. + + lpContext->XStateFeaturesMask = 0; + lpContext->Ffr = 0; + + for (int i = 0; i < 16; i++) + { + lpContext->P[i] = 0; + } } } #endif // TARGET_OSX From 8be931bb0adab9f7e1e4b6f4936a157f6c7f8166 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 08:08:00 +0100 Subject: [PATCH 24/29] Move context handling inside XSTATE checks --- src/coreclr/pal/inc/pal.h | 2 + src/coreclr/pal/src/CMakeLists.txt | 5 + src/coreclr/pal/src/arch/arm64/asmconstants.h | 5 +- src/coreclr/pal/src/arch/arm64/context2.S | 20 ++- src/coreclr/pal/src/include/pal/context.h | 8 +- src/coreclr/pal/src/thread/context.cpp | 158 +++++++++--------- 6 files changed, 110 insertions(+), 88 deletions(-) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 8885cde07e896f..0fe847d52e8965 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1844,6 +1844,8 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY { #define CONTEXT_EXCEPTION_REQUEST 0x40000000L #define CONTEXT_EXCEPTION_REPORTING 0x80000000L +#define CONTEXT_XSTATE (CONTEXT_ARM64 | 0x40L) + #define XSTATE_SVE (0) #define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE)) diff --git a/src/coreclr/pal/src/CMakeLists.txt b/src/coreclr/pal/src/CMakeLists.txt index 2398334e899897..28fc8765768a54 100644 --- a/src/coreclr/pal/src/CMakeLists.txt +++ b/src/coreclr/pal/src/CMakeLists.txt @@ -90,6 +90,11 @@ if(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET add_definitions(-DXSTATE_SUPPORTED) endif(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL) +if(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL) + # Currently the _xstate is not available on Alpine Linux + add_definitions(-DXSTATE_SUPPORTED) +endif(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL) + if(CLR_CMAKE_TARGET_LINUX_MUSL) # Setting RLIMIT_NOFILE breaks debugging of coreclr on Alpine Linux for some reason add_definitions(-DDONT_SET_RLIMIT_NOFILE) diff --git a/src/coreclr/pal/src/arch/arm64/asmconstants.h b/src/coreclr/pal/src/arch/arm64/asmconstants.h index 59ef9d879f66ca..9499b3535fc014 100644 --- a/src/coreclr/pal/src/arch/arm64/asmconstants.h +++ b/src/coreclr/pal/src/arch/arm64/asmconstants.h @@ -18,7 +18,10 @@ #define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT) -#define XSTATE_SVE (0) +#define CONTEXT_XSTATE_BIT (6) +#define CONTEXT_XSTATE (1 << CONTEXT_XSTATE_BIT) + +#define XSTATE_SVE_BIT (0) #define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE)) diff --git a/src/coreclr/pal/src/arch/arm64/context2.S b/src/coreclr/pal/src/arch/arm64/context2.S index d0734fec875350..293b765aefb557 100644 --- a/src/coreclr/pal/src/arch/arm64/context2.S +++ b/src/coreclr/pal/src/arch/arm64/context2.S @@ -70,7 +70,6 @@ LOCAL_LABEL(Done_CONTEXT_CONTROL): stp x26, x27, [x0, CONTEXT_X26] str x28, [x0, CONTEXT_X28] - LOCAL_LABEL(Done_CONTEXT_INTEGER): ldr w1, [x0, CONTEXT_ContextFlags] // clangs assembler doesn't seem to support the mov Wx, imm32 yet @@ -105,8 +104,17 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER): sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT): + ldr w1, [x0, CONTEXT_ContextFlags] + // clangs assembler doesn't seem to support the mov Wx, imm32 yet + movz w2, #0x40, lsl #16 + movk w2, #0x40 + mov w3, w2 + and w2, w1, w2 + cmp w2, w3 + b.ne LOCAL_LABEL(Done_CONTEXT_SVE) + ldr x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] - tbz x1, #XSTATE_SVE, LOCAL_LABEL(Done_CONTEXT_SVE) + tbz x1, #XSTATE_SVE_BIT, LOCAL_LABEL(Done_CONTEXT_SVE) add x0, x0, CONTEXT_SVE_OFFSET str p0, [x0, CONTEXT_P0_VL, MUL VL] @@ -151,8 +159,7 @@ LEAF_ENTRY RtlCaptureContext, _TEXT orr w1, w1, #0x4 orr w1, w1, #0x8 str w1, [x0, CONTEXT_ContextFlags] - movz x1, 0 - str x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] + str xzr, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET] ldr x1, [sp] EPILOG_STACK_FREE 16 b C_FUNC(CONTEXT_CaptureContext) @@ -184,8 +191,11 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT): // since we potentially clobber x0 below, we'll bank it in x16 mov x16, x0 + ldr w17, [x16, CONTEXT_ContextFlags] + tbz w17, #CONTEXT_XSTATE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE) + ldr w17, [x16, CONTEXT_XSTATEFEATURESMASK_OFFSET] - tbz w17, #XSTATE_SVE, LOCAL_LABEL(No_Restore_CONTEXT_SVE) + tbz w17, #XSTATE_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE) add x16, x16, CONTEXT_SVE_OFFSET ldr p0, [x16, CONTEXT_FFR_VL, MUL VL] diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 4058cb27188290..866a9cd2a053ec 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -42,14 +42,14 @@ typedef ucontext_t native_context_t; #if !HAVE_MACH_EXCEPTIONS -#if defined(XSTATE_SUPPORTED) && !HAVE_PUBLIC_XSTATE_STRUCT +#if defined(XSTATE_SUPPORTED) && defined(HOST_AMD64) && !HAVE_PUBLIC_XSTATE_STRUCT namespace asm_sigcontext { #include }; using asm_sigcontext::_fpx_sw_bytes; using asm_sigcontext::_xstate; -#endif // defined(XSTATE_SUPPORTED) && !HAVE_PUBLIC_XSTATE_STRUCT +#endif // XSTATE_SUPPORTED && HOST_AMD64 && !HAVE_PUBLIC_XSTATE_STRUCT #else // !HAVE_MACH_EXCEPTIONS #include @@ -435,7 +435,7 @@ struct sve_context { ///////////////////// // Extended state -#ifdef XSTATE_SUPPORTED +#if defined(XSTATE_SUPPORTED) && defined(HOST_AMD64) #if HAVE_FPSTATE_GLIBC_RESERVED1 #define FPSTATE_RESERVED __glibc_reserved1 @@ -626,7 +626,7 @@ inline void *FPREG_Xstate_Hi16Zmm(const ucontext_t *uc, uint32_t *featureSize) _ASSERTE(FPREG_HasAvx512Registers(uc)); return FPREG_Xstate_ExtendedFeature(uc, featureSize, XSTATE_AVX512_ZMM); } -#endif // XSTATE_SUPPORTED +#endif // XSTATE_SUPPORTED && HOST_AMD64 ///////////////////// diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 53051b80d3ce62..0ad59d21b033de 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -384,9 +384,9 @@ bool Xstate_IsAvx512Supported() #if !HAVE_MACH_EXCEPTIONS -#ifdef XSTATE_SUPPORTED +#if defined(XSTATE_SUPPORTED) && defined(HOST_AMD64) Xstate_ExtendedFeature Xstate_ExtendedFeatures[Xstate_ExtendedFeatures_Count]; -#endif // XSTATE_SUPPORTED +#endif // XSTATE_SUPPORTED && HOST_AMD64 /*++ Function: @@ -660,6 +660,16 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) #endif // (HAVE_GREGSET_T || HAVE___GREGSET_T) && !HOST_S390X && !HOST_LOONGARCH64 && !HOST_RISCV64 && !HOST_POWERPC64 #endif // !HAVE_FPREGS_WITH_CW +#if defined(HOST_ARM64) && !defined(TARGET_OSX) && !defined(TARGET_FREEBSD) + sve_context* sve = nullptr; + fpsimd_context* fp = nullptr; + if (((lpContext->ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) || + ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE)) + { + GetNativeSigSimdContext(native, &fp, &sve); + } +#endif // HOST_ARM64 && !TARGET_OSX && !TARGET_FREEBSD + if ((lpContext->ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) { #ifdef HOST_AMD64 @@ -708,9 +718,6 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } } #else // TARGET_OSX - fpsimd_context* fp = nullptr; - sve_context* sve = nullptr; - GetNativeSigSimdContext(native, &fp, &sve); if (fp) { fp->fpsr = lpContext->Fpsr; @@ -720,31 +727,6 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) *(NEON128*) &fp->vregs[i] = lpContext->V[i]; } } - - if (sve && (lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE) - { - //TODO-SVE: This only handles vector lengths of 128bits. - - // If this is hit then the kernel does not think the SVE registers are live yet. - _ASSERT(sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))); - - uint16_t vq = sve_vq_from_vl(lpContext->Vl); - - // Vector length should not have changed. - _ASSERTE(lpContext->Vl == sve->vl); - - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; - - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - - for (int i = 0; i < 16; i++) - { - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; - } - } #endif // TARGET_OSX #elif defined(HOST_ARM) VfpSigFrame* fp = GetNativeSigSimdContext(native); @@ -797,9 +779,10 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) } // TODO: Enable for all Unix systems -#if defined(HOST_AMD64) && defined(XSTATE_SUPPORTED) +#if defined(XSTATE_SUPPORTED) if ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { +#if defined(HOST_AMD64) if (FPREG_HasYmmRegisters(native)) { _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_AVX) == XSTATE_MASK_AVX); @@ -828,8 +811,34 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) memcpy_s(dest, sizeof(M512) * 16, &lpContext->Zmm16, sizeof(M512) * 16); } } +#elif defined(HOST_ARM64) + if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + { + //TODO-SVE: This only handles vector lengths of 128bits. + + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); + + uint16_t vq = sve_vq_from_vl(lpContext->Vl); + + // Vector length should not have changed. + _ASSERTE(lpContext->Vl == sve->vl); + + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; + + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + } + } +#endif //HOST_AMD64 } -#endif //HOST_AMD64 && XSTATE_SUPPORTED +#endif //XSTATE_SUPPORTED + } #if defined(HOST_64BIT) && defined(HOST_ARM64) && !defined(TARGET_FREEBSD) && !defined(TARGET_OSX) @@ -863,7 +872,6 @@ void _GetNativeSigSimdContext(uint8_t *data, uint32_t size, fpsimd_context **fp_ _ASSERTE(position + ctx->size <= size); - switch (ctx->magic) { case FPSIMD_MAGIC: @@ -990,6 +998,16 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex #endif // (HAVE_GREGSET_T || HAVE___GREGSET_T) && !HOST_S390X && !HOST_LOONGARCH64 && !HOST_RISCV64 && !HOST_POWERPC64 && !HOST_POWERPC64 #endif // !HAVE_FPREGS_WITH_CW +#if defined(HOST_ARM64) && !defined(TARGET_OSX) && !defined(TARGET_FREEBSD) + const fpsimd_context* fp = nullptr; + const sve_context* sve = nullptr; + if (((lpContext->ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) || + ((lpContext->ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE)) + { + GetConstNativeSigSimdContext(native, &fp, &sve); + } +#endif // HOST_ARM64 && !TARGET_OSX && !TARGET_FREEBSD + if ((contextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) { #ifdef HOST_AMD64 @@ -1037,9 +1055,6 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex } } #else // TARGET_OSX - const fpsimd_context* fp = nullptr; - const sve_context* sve = nullptr; - GetConstNativeSigSimdContext(native, &fp, &sve); if (fp) { lpContext->Fpsr = fp->fpsr; @@ -1049,45 +1064,6 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->V[i] = *(NEON128*) &fp->vregs[i]; } } - - if (sve) - { - //TODO-SVE: This only handles vector lengths of 128bits. - - _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); - lpContext->Vl = sve->vl; - - if (sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) - { - uint16_t vq = sve_vq_from_vl(sve->vl); - - lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; - - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); - - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - - for (int i = 0; i < 16; i++) - { - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); - } - } - else - { - // Sve context is not present due to SVE registers not being live. - - lpContext->XStateFeaturesMask = 0; - lpContext->Ffr = 0; - - for (int i = 0; i < 16; i++) - { - lpContext->P[i] = 0; - } - } - } #endif // TARGET_OSX #elif defined(HOST_ARM) const VfpSigFrame* fp = GetConstNativeSigSimdContext(native); @@ -1146,11 +1122,12 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex #endif } -#ifdef HOST_AMD64 +#if defined(HOST_AMD64) || defined(HOST_ARM64) if ((contextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) { // TODO: Enable for all Unix systems -#if XSTATE_SUPPORTED +#if defined(XSTATE_SUPPORTED) +#if defined(HOST_AMD64) if (FPREG_HasYmmRegisters(native)) { uint32_t size; @@ -1179,6 +1156,31 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->XStateFeaturesMask |= XSTATE_MASK_AVX512; } } +#elif defined(HOST_ARM64) + if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) + { + //TODO-SVE: This only handles vector lengths of 128bits. + + _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); + lpContext->Vl = sve->vl; + + uint16_t vq = sve_vq_from_vl(sve->vl); + + lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; + + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); + + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + } + } +#endif // HOST_AMD64 else #endif // XSTATE_SUPPORTED { @@ -1188,7 +1190,7 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex lpContext->ContextFlags &= ~xstateFlags; } } -#endif // HOST_AMD64 +#endif // HOST_AMD64 || HOST_ARM64 } #if !HAVE_MACH_EXCEPTIONS From 6fb91415468216ac73f93e1b7186af5efedf0c44 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 11:15:32 +0100 Subject: [PATCH 25/29] Set CONTEXT_XSTATE --- src/coreclr/pal/src/exception/signal.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/pal/src/exception/signal.cpp b/src/coreclr/pal/src/exception/signal.cpp index c9112432f8d930..fa79e8a440482e 100644 --- a/src/coreclr/pal/src/exception/signal.cpp +++ b/src/coreclr/pal/src/exception/signal.cpp @@ -824,7 +824,7 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; -#if defined(HOST_AMD64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) contextFlags |= CONTEXT_XSTATE; #endif @@ -1005,7 +1005,7 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT; -#if defined(HOST_AMD64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) contextFlags |= CONTEXT_XSTATE; #endif From d2c2e100e0838cacbdafcf84a143bb0d02f7a094 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 12:27:21 +0100 Subject: [PATCH 26/29] Remove __pad and fix sizes --- .../Runtime/ExceptionServices/AsmOffsets.cs | 14 ++++++------- src/coreclr/pal/inc/pal.h | 5 ++--- src/coreclr/pal/src/arch/arm64/asmconstants.h | 20 +++++++++++++------ src/coreclr/vm/arm64/asmconstants.h | 2 +- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 88bfa97bcfda57..e7f82fb0ea4ac0 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -26,13 +26,13 @@ class AsmOffsets #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0xa00; - public const int OFFSETOF__REGDISPLAY__SP = 0x958; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x960; + public const int SIZEOF__REGDISPLAY = 0x9e0; + public const int OFFSETOF__REGDISPLAY__SP = 0x938; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x940; - public const int OFFSETOF__REGDISPLAY__SP = 0x898; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; + public const int SIZEOF__REGDISPLAY = 0x920; + public const int OFFSETOF__REGDISPLAY__SP = 0x878; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x880; #endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; @@ -126,7 +126,7 @@ class AsmOffsets #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX - public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3f0; + public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0; #else // TARGET_UNIX public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390; #endif // TARGET_UNIX diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 0fe847d52e8965..05c27cf185bb61 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1958,9 +1958,8 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // TODO-SVE: Support Vector register sizes >128bit // For 128bit, Z and V registers fully overlap, so there is no need to load/store both. - /* +0x390 */ DWORD Vl; - /* +0x394 */ DWORD Ffr; - /* +0x398 */ DWORD64 __pad; // Ensure 128bit alignment + /* +0x398 */ DWORD Vl; + /* +0x39c */ DWORD Ffr; /* +0x3a0 */ DWORD P[16]; /* +0x3e0 */ diff --git a/src/coreclr/pal/src/arch/arm64/asmconstants.h b/src/coreclr/pal/src/arch/arm64/asmconstants.h index 9499b3535fc014..d6379a28ce8013 100644 --- a/src/coreclr/pal/src/arch/arm64/asmconstants.h +++ b/src/coreclr/pal/src/arch/arm64/asmconstants.h @@ -97,17 +97,22 @@ #define CONTEXT_V31 CONTEXT_V30+16 #define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31+16 #define CONTEXT_Fpcr 0 -#define CONTEXT_Fpsr CONTEXT_Fpcr+8 +#define CONTEXT_Fpsr CONTEXT_Fpcr+4 +#define CONTEXT_NEON_SIZE CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+4 -#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+8 +#define CONTEXT_DEBUG_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_NEON_SIZE +#define CONTEXT_DEBUG_SIZE 120 // (8*4)+(8*8)+(2*4)+(2*8) + +#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_DEBUG_OFFSET+CONTEXT_DEBUG_SIZE // TODO-SVE: Support Vector register sizes >128bit -// SVE register offsets are multiples of the vector length -// For 128bit, Z and V registers fully overlap, so there is no need to load/store both. #define CONTEXT_SVE_OFFSET CONTEXT_XSTATEFEATURESMASK_OFFSET+8 -#define CONTEXT_FFR_VL 0 +#define CONTEXT_VL_OFFSET 0 +// SVE register offsets are multiples of the vector length +#define CONTEXT_SVE_REGS_OFFSET CONTEXT_VL_OFFSET+4 +#define CONTEXT_FFR_VL 0 #define CONTEXT_P0_VL CONTEXT_FFR_VL+1 #define CONTEXT_P1_VL CONTEXT_P0_VL+1 #define CONTEXT_P2_VL CONTEXT_P1_VL+1 @@ -125,6 +130,9 @@ #define CONTEXT_P14_VL CONTEXT_P13_VL+1 #define CONTEXT_P15_VL CONTEXT_P14_VL+1 -#define CONTEXT_Size CONTEXT_SVE_OFFSET + ((CONTEXT_P15_VL+1) * 4) +#define CONTEXT_SVE_REGS_SIZE ((CONTEXT_P15_VL+1) * 4) +#define CONTEXT_SVE_SIZE CONTEXT_SVE_REGS_SIZE + 8 + +#define CONTEXT_Size CONTEXT_SVE_OFFSET + CONTEXT_SVE_SIZE #endif diff --git a/src/coreclr/vm/arm64/asmconstants.h b/src/coreclr/vm/arm64/asmconstants.h index 3cafbfb719f380..eb2462f8ec78e3 100644 --- a/src/coreclr/vm/arm64/asmconstants.h +++ b/src/coreclr/vm/arm64/asmconstants.h @@ -122,7 +122,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__GSCookie == sizeof(GSCookie)); ASMCONSTANTS_C_ASSERT(SIZEOF__Frame == sizeof(Frame)); #if !defined(HOST_WINDOWS) -#define SIZEOF__CONTEXT 0x3f0 +#define SIZEOF__CONTEXT 0x3e0 #else #define SIZEOF__CONTEXT 0x390 #endif From f0a1dba889e83345734a63a6d3f5858352b9f35a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 15:20:35 +0100 Subject: [PATCH 27/29] Fix context sizes --- .../System/Runtime/ExceptionServices/AsmOffsets.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index e7f82fb0ea4ac0..4ddd0bb1da0793 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -20,9 +20,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__SP = 0x1a78; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a80; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0xbf0; - public const int OFFSETOF__REGDISPLAY__SP = 0xbd8; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; + public const int SIZEOF__REGDISPLAY = 0x940; + public const int OFFSETOF__REGDISPLAY__SP = 0x898; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX @@ -78,9 +78,9 @@ class AsmOffsets #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x9f0; - public const int OFFSETOF__REGDISPLAY__SP = 0x950; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x958; + public const int SIZEOF__REGDISPLAY = 0x9d0; + public const int OFFSETOF__REGDISPLAY__SP = 0x930; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938; #else // TARGET_UNIX public const int SIZEOF__REGDISPLAY = 0x930; public const int OFFSETOF__REGDISPLAY__SP = 0x890; From 178e266382972288997132c44705823abe63aeaa Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 15:26:43 +0100 Subject: [PATCH 28/29] Fix context sizes --- .../System/Runtime/ExceptionServices/AsmOffsets.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 4ddd0bb1da0793..2a8a462c977b8f 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -20,9 +20,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__SP = 0x1a78; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1a80; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x940; - public const int OFFSETOF__REGDISPLAY__SP = 0x898; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; + public const int SIZEOF__REGDISPLAY = 0xbf0; + public const int OFFSETOF__REGDISPLAY__SP = 0xbd8; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX #elif TARGET_ARM64 #if TARGET_UNIX @@ -30,9 +30,9 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__SP = 0x938; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x920; - public const int OFFSETOF__REGDISPLAY__SP = 0x878; - public const int OFFSETOF__REGDISPLAY__ControlPC = 0x880; + public const int SIZEOF__REGDISPLAY = 0x940; + public const int OFFSETOF__REGDISPLAY__SP = 0x898; + public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0; #endif // TARGET_UNIX #elif TARGET_ARM public const int SIZEOF__REGDISPLAY = 0x410; From 29933a8febe43933a9165bbbbea8aaad175d5e5b Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 28 Jun 2024 22:32:21 +0100 Subject: [PATCH 29/29] Only read/write OS context SVE registers on 128bit --- src/coreclr/pal/src/arch/arm64/context2.S | 9 ++++ src/coreclr/pal/src/include/pal/context.h | 16 +++++++ src/coreclr/pal/src/thread/context.cpp | 56 ++++++++++++----------- 3 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/coreclr/pal/src/arch/arm64/context2.S b/src/coreclr/pal/src/arch/arm64/context2.S index 293b765aefb557..4bfde2f19fbcb4 100644 --- a/src/coreclr/pal/src/arch/arm64/context2.S +++ b/src/coreclr/pal/src/arch/arm64/context2.S @@ -295,3 +295,12 @@ LEAF_ENTRY RestoreCompleteContext, _TEXT LEAF_END RestoreCompleteContext, _TEXT #endif // __APPLE__ + +// Incoming: +// None +// +.arch_extension sve + LEAF_ENTRY CONTEXT_GetSveLengthFromOS, _TEXT + rdvl x0, 1 + ret lr + LEAF_END CONTEXT_GetSveLengthFromOS, _TEXT diff --git a/src/coreclr/pal/src/include/pal/context.h b/src/coreclr/pal/src/include/pal/context.h index 866a9cd2a053ec..932b40c52a821d 100644 --- a/src/coreclr/pal/src/include/pal/context.h +++ b/src/coreclr/pal/src/include/pal/context.h @@ -1544,6 +1544,22 @@ DWORD CONTEXTGetExceptionCodeForSignal(const siginfo_t *siginfo, #endif // HAVE_MACH_EXCEPTIONS else +#if defined(HOST_ARM64) +/*++ +Function : + CONTEXT_GetSveLengthFromOS + + Gets the SVE vector length +Parameters : + None +Return value : + The SVE vector length in bytes +--*/ +DWORD64 +CONTEXT_GetSveLengthFromOS( + ); +#endif // HOST_ARM64 + #ifdef __cplusplus } #endif // __cplusplus diff --git a/src/coreclr/pal/src/thread/context.cpp b/src/coreclr/pal/src/thread/context.cpp index 0ad59d21b033de..48fd7e94d3c3d5 100644 --- a/src/coreclr/pal/src/thread/context.cpp +++ b/src/coreclr/pal/src/thread/context.cpp @@ -815,24 +815,26 @@ void CONTEXTToNativeContext(CONST CONTEXT *lpContext, native_context_t *native) if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) { //TODO-SVE: This only handles vector lengths of 128bits. + if (CONTEXT_GetSveLengthFromOS() == 16) + { + _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); - _ASSERT((lpContext->XStateFeaturesMask & XSTATE_MASK_SVE) == XSTATE_MASK_SVE); - - uint16_t vq = sve_vq_from_vl(lpContext->Vl); + uint16_t vq = sve_vq_from_vl(lpContext->Vl); - // Vector length should not have changed. - _ASSERTE(lpContext->Vl == sve->vl); + // Vector length should not have changed. + _ASSERTE(lpContext->Vl == sve->vl); - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)) = lpContext->Ffr; - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - for (int i = 0; i < 16; i++) - { - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)) = lpContext->P[i]; + } } } #endif //HOST_AMD64 @@ -1160,24 +1162,26 @@ void CONTEXTFromNativeContext(const native_context_t *native, LPCONTEXT lpContex if (sve && sve->head.size >= SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl))) { //TODO-SVE: This only handles vector lengths of 128bits. + if (CONTEXT_GetSveLengthFromOS() == 16) + { + _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); + lpContext->Vl = sve->vl; - _ASSERTE((sve->vl > 0) && (sve->vl % 16 == 0)); - lpContext->Vl = sve->vl; - - uint16_t vq = sve_vq_from_vl(sve->vl); + uint16_t vq = sve_vq_from_vl(sve->vl); - lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; + lpContext->XStateFeaturesMask |= XSTATE_MASK_SVE; - //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. - lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); + //Note: Size of ffr register is SVE_SIG_FFR_SIZE(vq) bytes. + lpContext->Ffr = *(WORD*) (((uint8_t*)sve) + SVE_SIG_FFR_OFFSET(vq)); - //TODO-SVE: Copy SVE registers once they are >128bits - //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. + //TODO-SVE: Copy SVE registers once they are >128bits + //Note: Size of a Z register is SVE_SIG_ZREGS_SIZE(vq) bytes. - for (int i = 0; i < 16; i++) - { - //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. - lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + for (int i = 0; i < 16; i++) + { + //Note: Size of a P register is SVE_SIG_PREGS_SIZE(vq) bytes. + lpContext->P[i] = *(WORD*) (((uint8_t*)sve) + SVE_SIG_PREG_OFFSET(vq, i)); + } } } #endif // HOST_AMD64