Skip to content

Commit

Permalink
ARM64-SVE: Add SVE registers to pal context (#103801)
Browse files Browse the repository at this point in the history
* ARM64-SVE: Add SVE registers to pal context

* fix debug sizes

* Add SVE defines if missing from Linux host

* More missing defines

* More missing defines

* Add cast

* Move SVE registers after debug registers

* Fix dbgtargetcontext

* Remove SVE from debug context

* Move ffr

* Add SVE registers to asmconstants

* Remove Z registers from context

* backup/restore SVE in Context2.S

Change-Id: I2f6bc39068d9fed3f45b548089b144884607d97b

* Remove unused SVE128 struct

* Add XStateFeaturesMask

* restore instrsarm64sve.h changes

* Restore SIZEOF__CONTEXT for windows

* Fix AsmOffsets.cs for windows

* Fix AsmOffsets.cs for windows

* Restore missing ldr

* Check size of SVE data returned from the kernel

* 16 P registers

* Copy context based on XSTATE_MASK_SVE

* Move context handling inside XSTATE checks

* Set CONTEXT_XSTATE

* Remove __pad and fix sizes

* Fix context sizes

* Fix context sizes

* Only read/write OS context SVE registers on 128bit
  • Loading branch information
a74nh authored Jun 29, 2024
1 parent 6f1d8c5 commit 9528c15
Show file tree
Hide file tree
Showing 10 changed files with 457 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@ class AsmOffsets
public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0;
#endif // TARGET_UNIX
#elif TARGET_ARM64
#if TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x9e0;
public const int OFFSETOF__REGDISPLAY__SP = 0x938;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x940;
#else // TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x940;
public const int OFFSETOF__REGDISPLAY__SP = 0x898;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x8a0;
#endif // TARGET_UNIX
#elif TARGET_ARM
public const int SIZEOF__REGDISPLAY = 0x410;
public const int OFFSETOF__REGDISPLAY__SP = 0x3ec;
Expand Down Expand Up @@ -71,9 +77,15 @@ class AsmOffsets
public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbd8;
#endif // TARGET_UNIX
#elif TARGET_ARM64
#if TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x9d0;
public const int OFFSETOF__REGDISPLAY__SP = 0x930;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x938;
#else // TARGET_UNIX
public const int SIZEOF__REGDISPLAY = 0x930;
public const int OFFSETOF__REGDISPLAY__SP = 0x890;
public const int OFFSETOF__REGDISPLAY__ControlPC = 0x898;
#endif // TARGET_UNIX
#elif TARGET_ARM
public const int SIZEOF__REGDISPLAY = 0x408;
public const int OFFSETOF__REGDISPLAY__SP = 0x3e8;
Expand Down Expand Up @@ -111,9 +123,13 @@ class AsmOffsets
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0xc20;
#else // TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x4d0;
#endif // TARGET_UNIx
#endif // TARGET_UNIX
#elif TARGET_ARM64
#if TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x3e0;
#else // TARGET_UNIX
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x390;
#endif // TARGET_UNIX
#elif TARGET_ARM
public const int SIZEOF__PAL_LIMITED_CONTEXT = 0x1a0;
#elif TARGET_X86
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/debug/inc/dbgtargetcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,12 @@ typedef DECLSPEC_ALIGN(16) struct {

} DT_CONTEXT;


#if !defined(CROSS_COMPILE) && !defined(TARGET_WINDOWS)
static_assert(sizeof(DT_CONTEXT) == offsetof(T_CONTEXT, XStateFeaturesMask), "DT_CONTEXT must not include the SVE registers on AMD64");
#else
static_assert(sizeof(DT_CONTEXT) == sizeof(T_CONTEXT), "DT_CONTEXT size must equal the T_CONTEXT size on ARM64");
#endif

#elif defined(DTCONTEXT_IS_LOONGARCH64)

Expand Down
19 changes: 18 additions & 1 deletion src/coreclr/pal/inc/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1844,6 +1844,12 @@ typedef struct _IMAGE_ARM_RUNTIME_FUNCTION_ENTRY {
#define CONTEXT_EXCEPTION_REQUEST 0x40000000L
#define CONTEXT_EXCEPTION_REPORTING 0x80000000L

#define CONTEXT_XSTATE (CONTEXT_ARM64 | 0x40L)

#define XSTATE_SVE (0)

#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE))

//
// This flag is set by the unwinder if it has unwound to a call
// site, and cleared whenever it unwinds through a trap frame.
Expand Down Expand Up @@ -1944,7 +1950,18 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
/* +0x338 */ DWORD64 Bvr[ARM64_MAX_BREAKPOINTS];
/* +0x378 */ DWORD Wcr[ARM64_MAX_WATCHPOINTS];
/* +0x380 */ DWORD64 Wvr[ARM64_MAX_WATCHPOINTS];
/* +0x390 */

/* +0x390 */ DWORD64 XStateFeaturesMask;

//
// Sve Registers
//
// TODO-SVE: Support Vector register sizes >128bit
// For 128bit, Z and V registers fully overlap, so there is no need to load/store both.
/* +0x398 */ DWORD Vl;
/* +0x39c */ DWORD Ffr;
/* +0x3a0 */ DWORD P[16];
/* +0x3e0 */

} CONTEXT, *PCONTEXT, *LPCONTEXT;

Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/pal/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ if(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET
add_definitions(-DXSTATE_SUPPORTED)
endif(CLR_CMAKE_HOST_ARCH_AMD64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)

if(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)
# Currently the _xstate is not available on Alpine Linux
add_definitions(-DXSTATE_SUPPORTED)
endif(CLR_CMAKE_HOST_ARCH_ARM64 AND CLR_CMAKE_TARGET_LINUX AND NOT CLR_CMAKE_TARGET_LINUX_MUSL)

if(CLR_CMAKE_TARGET_LINUX_MUSL)
# Setting RLIMIT_NOFILE breaks debugging of coreclr on Alpine Linux for some reason
add_definitions(-DDONT_SET_RLIMIT_NOFILE)
Expand Down
47 changes: 45 additions & 2 deletions src/coreclr/pal/src/arch/arm64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@

#define CONTEXT_FULL (CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT)

#define CONTEXT_XSTATE_BIT (6)
#define CONTEXT_XSTATE (1 << CONTEXT_XSTATE_BIT)

#define XSTATE_SVE_BIT (0)

#define XSTATE_MASK_SVE (UI64(1) << (XSTATE_SVE))


#define CONTEXT_ContextFlags 0
#define CONTEXT_Cpsr CONTEXT_ContextFlags+4
Expand Down Expand Up @@ -54,6 +61,7 @@
#define CONTEXT_Lr CONTEXT_Fp+8
#define CONTEXT_Sp CONTEXT_Lr+8
#define CONTEXT_Pc CONTEXT_Sp+8

#define CONTEXT_NEON_OFFSET CONTEXT_Pc+8
#define CONTEXT_V0 0
#define CONTEXT_V1 CONTEXT_V0+16
Expand Down Expand Up @@ -89,7 +97,42 @@
#define CONTEXT_V31 CONTEXT_V30+16
#define CONTEXT_FLOAT_CONTROL_OFFSET CONTEXT_V31+16
#define CONTEXT_Fpcr 0
#define CONTEXT_Fpsr CONTEXT_Fpcr+8
#define CONTEXT_Size ((CONTEXT_NEON_OFFSET + CONTEXT_Fpsr + 8 + 0xf) & ~0xf)
#define CONTEXT_Fpsr CONTEXT_Fpcr+4
#define CONTEXT_NEON_SIZE CONTEXT_FLOAT_CONTROL_OFFSET+CONTEXT_Fpsr+4

#define CONTEXT_DEBUG_OFFSET CONTEXT_NEON_OFFSET+CONTEXT_NEON_SIZE
#define CONTEXT_DEBUG_SIZE 120 // (8*4)+(8*8)+(2*4)+(2*8)

#define CONTEXT_XSTATEFEATURESMASK_OFFSET CONTEXT_DEBUG_OFFSET+CONTEXT_DEBUG_SIZE

// TODO-SVE: Support Vector register sizes >128bit

#define CONTEXT_SVE_OFFSET CONTEXT_XSTATEFEATURESMASK_OFFSET+8
#define CONTEXT_VL_OFFSET 0

// SVE register offsets are multiples of the vector length
#define CONTEXT_SVE_REGS_OFFSET CONTEXT_VL_OFFSET+4
#define CONTEXT_FFR_VL 0
#define CONTEXT_P0_VL CONTEXT_FFR_VL+1
#define CONTEXT_P1_VL CONTEXT_P0_VL+1
#define CONTEXT_P2_VL CONTEXT_P1_VL+1
#define CONTEXT_P3_VL CONTEXT_P2_VL+1
#define CONTEXT_P4_VL CONTEXT_P3_VL+1
#define CONTEXT_P5_VL CONTEXT_P4_VL+1
#define CONTEXT_P6_VL CONTEXT_P5_VL+1
#define CONTEXT_P7_VL CONTEXT_P6_VL+1
#define CONTEXT_P8_VL CONTEXT_P7_VL+1
#define CONTEXT_P9_VL CONTEXT_P8_VL+1
#define CONTEXT_P10_VL CONTEXT_P9_VL+1
#define CONTEXT_P11_VL CONTEXT_P10_VL+1
#define CONTEXT_P12_VL CONTEXT_P11_VL+1
#define CONTEXT_P13_VL CONTEXT_P12_VL+1
#define CONTEXT_P14_VL CONTEXT_P13_VL+1
#define CONTEXT_P15_VL CONTEXT_P14_VL+1

#define CONTEXT_SVE_REGS_SIZE ((CONTEXT_P15_VL+1) * 4)
#define CONTEXT_SVE_SIZE CONTEXT_SVE_REGS_SIZE + 8

#define CONTEXT_Size CONTEXT_SVE_OFFSET + CONTEXT_SVE_SIZE

#endif
78 changes: 76 additions & 2 deletions src/coreclr/pal/src/arch/arm64/context2.S
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//
// Implementation of _CONTEXT_CaptureContext for the ARM platform.
// Implementation of _CONTEXT_CaptureContext for the ARM64 platform.
// This function is processor dependent. It is used by exception handling,
// and is always apply to the current thread.
//
Expand All @@ -12,6 +12,7 @@
// Incoming:
// x0: Context*
//
.arch_extension sve
LEAF_ENTRY CONTEXT_CaptureContext, _TEXT
PROLOG_STACK_ALLOC 32
.cfi_adjust_cfa_offset 32
Expand Down Expand Up @@ -69,7 +70,6 @@ LOCAL_LABEL(Done_CONTEXT_CONTROL):
stp x26, x27, [x0, CONTEXT_X26]
str x28, [x0, CONTEXT_X28]


LOCAL_LABEL(Done_CONTEXT_INTEGER):
ldr w1, [x0, CONTEXT_ContextFlags]
// clangs assembler doesn't seem to support the mov Wx, imm32 yet
Expand Down Expand Up @@ -104,6 +104,41 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
sub x0, x0, CONTEXT_FLOAT_CONTROL_OFFSET + CONTEXT_NEON_OFFSET

LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT):
ldr w1, [x0, CONTEXT_ContextFlags]
// clangs assembler doesn't seem to support the mov Wx, imm32 yet
movz w2, #0x40, lsl #16
movk w2, #0x40
mov w3, w2
and w2, w1, w2
cmp w2, w3
b.ne LOCAL_LABEL(Done_CONTEXT_SVE)

ldr x1, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET]
tbz x1, #XSTATE_SVE_BIT, LOCAL_LABEL(Done_CONTEXT_SVE)

add x0, x0, CONTEXT_SVE_OFFSET
str p0, [x0, CONTEXT_P0_VL, MUL VL]
str p1, [x0, CONTEXT_P1_VL, MUL VL]
str p2, [x0, CONTEXT_P2_VL, MUL VL]
str p3, [x0, CONTEXT_P3_VL, MUL VL]
str p4, [x0, CONTEXT_P4_VL, MUL VL]
str p5, [x0, CONTEXT_P5_VL, MUL VL]
str p6, [x0, CONTEXT_P6_VL, MUL VL]
str p7, [x0, CONTEXT_P7_VL, MUL VL]
str p8, [x0, CONTEXT_P8_VL, MUL VL]
str p9, [x0, CONTEXT_P9_VL, MUL VL]
str p10, [x0, CONTEXT_P10_VL, MUL VL]
str p11, [x0, CONTEXT_P11_VL, MUL VL]
str p12, [x0, CONTEXT_P12_VL, MUL VL]
str p13, [x0, CONTEXT_P13_VL, MUL VL]
str p14, [x0, CONTEXT_P14_VL, MUL VL]
str p15, [x0, CONTEXT_P15_VL, MUL VL]
rdffr p0.b
str p0, [x0, CONTEXT_FFR_VL, MUL VL]
ldr p0, [x0, 0, MUL VL]
sub x0, x0, CONTEXT_SVE_OFFSET

LOCAL_LABEL(Done_CONTEXT_SVE):

EPILOG_STACK_FREE 32
ret
Expand All @@ -124,6 +159,7 @@ LEAF_ENTRY RtlCaptureContext, _TEXT
orr w1, w1, #0x4
orr w1, w1, #0x8
str w1, [x0, CONTEXT_ContextFlags]
str xzr, [x0, CONTEXT_XSTATEFEATURESMASK_OFFSET]
ldr x1, [sp]
EPILOG_STACK_FREE 16
b C_FUNC(CONTEXT_CaptureContext)
Expand All @@ -133,6 +169,7 @@ LEAF_END RtlCaptureContext, _TEXT
// x0: Context*
// x1: Exception*
//
.arch_extension sve
LEAF_ENTRY RtlRestoreContext, _TEXT

#ifdef HAS_ADDRESS_SANITIZER
Expand All @@ -154,6 +191,34 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
// since we potentially clobber x0 below, we'll bank it in x16
mov x16, x0

ldr w17, [x16, CONTEXT_ContextFlags]
tbz w17, #CONTEXT_XSTATE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE)

ldr w17, [x16, CONTEXT_XSTATEFEATURESMASK_OFFSET]
tbz w17, #XSTATE_SVE_BIT, LOCAL_LABEL(No_Restore_CONTEXT_SVE)

add x16, x16, CONTEXT_SVE_OFFSET
ldr p0, [x16, CONTEXT_FFR_VL, MUL VL]
wrffr p0.b
ldr p0, [x16, CONTEXT_P0_VL, MUL VL]
ldr p1, [x16, CONTEXT_P1_VL, MUL VL]
ldr p2, [x16, CONTEXT_P2_VL, MUL VL]
ldr p3, [x16, CONTEXT_P3_VL, MUL VL]
ldr p4, [x16, CONTEXT_P4_VL, MUL VL]
ldr p5, [x16, CONTEXT_P5_VL, MUL VL]
ldr p6, [x16, CONTEXT_P6_VL, MUL VL]
ldr p7, [x16, CONTEXT_P7_VL, MUL VL]
ldr p8, [x16, CONTEXT_P8_VL, MUL VL]
ldr p9, [x16, CONTEXT_P9_VL, MUL VL]
ldr p10, [x16, CONTEXT_P10_VL, MUL VL]
ldr p11, [x16, CONTEXT_P11_VL, MUL VL]
ldr p12, [x16, CONTEXT_P12_VL, MUL VL]
ldr p13, [x16, CONTEXT_P13_VL, MUL VL]
ldr p14, [x16, CONTEXT_P14_VL, MUL VL]
ldr p15, [x16, CONTEXT_P15_VL, MUL VL]
sub x16, x16, CONTEXT_SVE_OFFSET

LOCAL_LABEL(No_Restore_CONTEXT_SVE):
ldr w17, [x16, CONTEXT_ContextFlags]
tbz w17, #CONTEXT_FLOATING_POINT_BIT, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT)

Expand Down Expand Up @@ -230,3 +295,12 @@ LEAF_ENTRY RestoreCompleteContext, _TEXT
LEAF_END RestoreCompleteContext, _TEXT

#endif // __APPLE__

// Incoming:
// None
//
.arch_extension sve
LEAF_ENTRY CONTEXT_GetSveLengthFromOS, _TEXT
rdvl x0, 1
ret lr
LEAF_END CONTEXT_GetSveLengthFromOS, _TEXT
4 changes: 2 additions & 2 deletions src/coreclr/pal/src/exception/signal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,7 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex

ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT;

#if defined(HOST_AMD64)
#if defined(HOST_AMD64) || defined(HOST_ARM64)
contextFlags |= CONTEXT_XSTATE;
#endif

Expand Down Expand Up @@ -1053,7 +1053,7 @@ static bool common_signal_handler(int code, siginfo_t *siginfo, void *sigcontext

ULONG contextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT;

#if defined(HOST_AMD64)
#if defined(HOST_AMD64) || defined(HOST_ARM64)
contextFlags |= CONTEXT_XSTATE;
#endif

Expand Down
Loading

0 comments on commit 9528c15

Please sign in to comment.