diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 107859ec58dc5..16308f0dce921 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -151,7 +151,7 @@ add_subdirectory(tools/aot/jitinterface) if(NOT CLR_CROSS_COMPONENTS_BUILD) # NativeAOT only buildable for a subset of CoreCLR-supported configurations - if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_LOONGARCH64 OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32)) + if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_LOONGARCH64 OR CLR_CMAKE_HOST_ARCH_RISCV64 OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32)) add_subdirectory(nativeaot) endif() endif(NOT CLR_CROSS_COMPONENTS_BUILD) diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs index ccff78114d4e9..7932ba9300dc3 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs @@ -35,7 +35,7 @@ #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE #define ENREGISTERED_PARAMTYPE_MAXSIZE #elif TARGET_WASM -#elif TARGET_LOONGARCH64 +#elif TARGET_LOONGARCH64 || TARGET_RISCV64 #define CALLDESCR_ARGREGS // CallDescrWorker has ArgumentRegister parameter #define CALLDESCR_FPARGREGS // CallDescrWorker has FloatArgumentRegisters parameter #define ENREGISTERED_RETURNTYPE_MAXSIZE @@ -360,6 +360,60 @@ internal struct ArchitectureConstants public const int STACK_ELEM_SIZE = 8; public static int StackElemSize(int size) { return (((size) + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); } } +#elif TARGET_RISCV64 + [StructLayout(LayoutKind.Sequential)] + internal struct ReturnBlock + { + private IntPtr returnValue; + private IntPtr returnValue2; + private IntPtr returnValue3; + private IntPtr returnValue4; + } + + [StructLayout(LayoutKind.Sequential)] + internal struct ArgumentRegisters + { + private IntPtr a0; + private IntPtr a1; + private IntPtr a2; + private IntPtr a3; + private IntPtr a4; + private IntPtr a5; + private IntPtr a6; + private IntPtr a7; + public static unsafe int GetOffsetOfa7() + { + return sizeof(IntPtr) * 7; + } + } + + [StructLayout(LayoutKind.Sequential)] + internal struct FloatArgumentRegisters + { + private double fa0; + private double fa1; + private double fa2; + private double fa3; + private double fa4; + private double fa5; + private double fa6; + private double fa7; + } + + internal struct ArchitectureConstants + { + // To avoid corner case bugs, limit maximum size of the arguments with sufficient margin + public const int MAX_ARG_SIZE = 0xFFFFFF; + + public const int NUM_ARGUMENT_REGISTERS = 8; + public const int ARGUMENTREGISTERS_SIZE = NUM_ARGUMENT_REGISTERS * 8; + public const int ENREGISTERED_RETURNTYPE_MAXSIZE = 32; // bytes (four FP registers: fa0, fa1, fa2, and fa3) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE = 16; // bytes (two int registers: a0 and a1) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE_PRIMITIVE = 8; + public const int ENREGISTERED_PARAMTYPE_MAXSIZE = 16; // bytes (max value type size that can be passed by value) + public const int STACK_ELEM_SIZE = 8; + public static int StackElemSize(int size) { return (((size) + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); } + } #endif // @@ -465,6 +519,20 @@ public static unsafe int GetOffsetOfArgumentRegisters() return sizeof(ReturnBlock); } + public IntPtr m_alignmentPad; +#elif TARGET_RISCV64 + public ReturnBlock m_returnBlock; + public static unsafe int GetOffsetOfReturnValuesBlock() + { + return 0; + } + + public ArgumentRegisters m_argumentRegisters; + public static unsafe int GetOffsetOfArgumentRegisters() + { + return sizeof(ReturnBlock); + } + public IntPtr m_alignmentPad; #else #error Portability problem diff --git a/src/coreclr/nativeaot/Directory.Build.props b/src/coreclr/nativeaot/Directory.Build.props index c01756cfc8aba..54d44cd4947fb 100644 --- a/src/coreclr/nativeaot/Directory.Build.props +++ b/src/coreclr/nativeaot/Directory.Build.props @@ -92,6 +92,9 @@ TARGET_64BIT;TARGET_LOONGARCH64;$(DefineConstants) + + TARGET_64BIT;TARGET_RISCV64;$(DefineConstants) + TARGET_WINDOWS;$(DefineConstants) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index d27884dbdf1ff..21059545ed9ba 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -28,9 +28,12 @@ class AsmOffsets static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets"); static_assert(sizeof(((Array*)0)->m_Length) == sizeof(((String*)0)->m_Length), "The length field of String and Array have different sizes"); +#define TO_STRING(x) #x +#define OFFSET_STRING(cls, member) TO_STRING(offsetof(cls, member)) + +// Macro definition #define PLAT_ASM_OFFSET(offset, cls, member) \ - static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) > 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is smaller than 0x" #offset "."); \ - static_assert((offsetof(cls, member) == 0x##offset) || (offsetof(cls, member) < 0x##offset), "Bad asm offset for '" #cls "." #member "', the actual offset is larger than 0x" #offset "."); + static_assert(offsetof(cls, member) == 0x##offset, "Bad asm offset for '" #cls "." #member "'. Actual offset: " OFFSET_STRING(cls, member)); #define PLAT_ASM_SIZEOF(size, cls ) \ static_assert((sizeof(cls) == 0x##size) || (sizeof(cls) > 0x##size), "Bad asm size for '" #cls "', the actual size is smaller than 0x" #size "."); \ diff --git a/src/coreclr/nativeaot/Runtime/CommonMacros.h b/src/coreclr/nativeaot/Runtime/CommonMacros.h index c429057a14058..22077753082af 100644 --- a/src/coreclr/nativeaot/Runtime/CommonMacros.h +++ b/src/coreclr/nativeaot/Runtime/CommonMacros.h @@ -119,7 +119,7 @@ inline bool IS_ALIGNED(T* val, uintptr_t alignment); #define LOG2_PTRSIZE 2 #define POINTER_SIZE 4 -#elif defined(HOST_LOONGARCH64) +#elif defined(HOST_LOONGARCH64) || defined (HOST_RISCV64) #define LOG2_PTRSIZE 3 #define POINTER_SIZE 8 diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 569cf36e84fa5..e5e2625458f81 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -203,6 +203,24 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, pContext->Sp = pPalContext->SP; pContext->Ra = pPalContext->RA; pContext->Pc = pPalContext->IP; +#elif defined(HOST_RISCV64) + pContext->A0 = pPalContext->A0; + pContext->A1 = pPalContext->A1; + pContext->S1 = pPalContext->S1; + pContext->S2 = pPalContext->S2; + pContext->S3 = pPalContext->S3; + pContext->S4 = pPalContext->S4; + pContext->S5 = pPalContext->S5; + pContext->S6 = pPalContext->S6; + pContext->S7 = pPalContext->S7; + pContext->S8 = pPalContext->S8; + pContext->S9 = pPalContext->S9; + pContext->S10 = pPalContext->S10; + pContext->S11 = pPalContext->S11; + pContext->Fp = pPalContext->FP; + pContext->Sp = pPalContext->SP; + pContext->Ra = pPalContext->RA; + pContext->Pc = pPalContext->IP; #elif defined(HOST_WASM) // No registers, no work to do yet #else @@ -295,7 +313,7 @@ EXTERN_C CODE_LOCATION RhpCheckedAssignRefEBPAVLocation; #endif EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation1; -#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) +#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation2; #endif @@ -328,7 +346,7 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) (uintptr_t)&RhpCheckedAssignRefEBPAVLocation, #endif (uintptr_t)&RhpByRefAssignRefAVLocation1, -#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) +#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) && !defined(HOST_RISCV64) (uintptr_t)&RhpByRefAssignRefAVLocation2, #endif }; @@ -410,7 +428,7 @@ static uintptr_t UnwindSimpleHelperToCaller( pContext->SetSp(sp+sizeof(uintptr_t)); // pop the stack #elif defined(HOST_ARM) || defined(HOST_ARM64) uintptr_t adjustedFaultingIP = pContext->GetLr(); -#elif defined(HOST_LOONGARCH64) +#elif defined(HOST_LOONGARCH64) || defined(HOST_RISCV64) uintptr_t adjustedFaultingIP = pContext->GetRa(); #else uintptr_t adjustedFaultingIP = 0; // initializing to make the compiler happy diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index f88304a40e868..ec19b0b2ccd77 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -86,6 +86,27 @@ inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) return returnKind; } +#elif defined(TARGET_RISCV64) +// Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back +C_ASSERT(PTFF_A0_IS_GCREF == ((uint64_t)GCRK_Object << 31)); +C_ASSERT(PTFF_A0_IS_BYREF == ((uint64_t)GCRK_Byref << 31)); +C_ASSERT(PTFF_A1_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 31)); +C_ASSERT(PTFF_A1_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 31)); + +inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) +{ + // Just need to report GC ref bits here. + // Appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 31); +} + +inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) +{ + GCRefKind returnKind = (GCRefKind)((transFrameFlags & ( PTFF_A0_IS_GCREF | PTFF_A0_IS_BYREF | PTFF_A1_IS_GCREF | PTFF_A1_IS_BYREF)) >> 31); + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_A0) && (transFrameFlags & PTFF_SAVE_A1))); + return returnKind; +} + #elif defined(TARGET_AMD64) // Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index 7b5040284eab1..cd5f37e519992 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -335,6 +335,7 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) int64_t distToTarget = ((int64_t)pCode[0] << 38) >> 36; return (uint8_t *)pCode + distToTarget; } + #elif TARGET_LOONGARCH64 uint32_t * pCode = (uint32_t *)pCodeOrg; // is this "addi.d $a0, $a0, 8"? @@ -370,6 +371,45 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) distToTarget += ((((int64_t)pCode[1] & ~0x3ff) << 38) >> 46); return (uint8_t *)((int64_t)pCode + distToTarget); } + +#elif TARGET_RISCV64 + uint32_t * pCode = (uint32_t *)pCodeOrg; + if (pCode[0] == 0x00850513) // Encoding for `addi a0, a0, 8` in 32-bit instruction format + { + // unboxing sequence + unboxingStub = true; + pCode++; + } + // is this an indirect jump? + // lui t0, imm; jalr t0, t0, imm12 + if ((pCode[0] & 0x7f) == 0x17 && // auipc + (pCode[1] & 0x707f) == 0x3003 && // ld with funct3=011 + (pCode[2] & 0x707f) == 0x0067) // jr (jalr with x0 as rd and funct3=000) + { + // Compute the distance to the IAT cell + int64_t distToIatCell = (((int32_t)pCode[0]) >> 12) << 12; // Extract imm20 from auipc + distToIatCell += ((int32_t)pCode[1]) >> 20; // Add imm12 from ld + + uint8_t ** pIatCell = (uint8_t **)(((int64_t)pCode & ~0xfff) + distToIatCell); + return *pIatCell; + } + + // Is this an unboxing stub followed by a relative jump? + // auipc t0, imm20; jalr ra, imm12(t0) + else if (unboxingStub && + (pCode[0] & 0x7f) == 0x17 && // auipc opcode + (pCode[1] & 0x707f) == 0x0067) // jalr opcode with funct3=000 + { + // Extract imm20 from auipc + int64_t distToTarget = (((int32_t)pCode[0]) >> 12) << 12; // Extract imm20 (bits 31:12) + + // Extract imm12 from jalr + distToTarget += ((int32_t)pCode[1]) >> 20; // Extract imm12 (bits 31:20) + + // Calculate the final target address relative to PC + return (uint8_t *)((int64_t)pCode + distToTarget); + } + #else UNREFERENCED_PARAMETER(unboxingStub); PORTABILITY_ASSERT("RhGetCodeTarget"); diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 9f613d13a8b1e..fa86e5b050b88 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -531,6 +531,83 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { } } CONTEXT, *PCONTEXT; +#elif defined(TARGET_RISCV64) + +#define CONTEXT_RISCV64 0x01000000L + +#define CONTEXT_CONTROL (CONTEXT_RISCV64 | 0x1L) +#define CONTEXT_INTEGER (CONTEXT_RISCV64 | 0x2L) + +#define RISCV64_MAX_BREAKPOINTS 8 +#define RISCV64_MAX_WATCHPOINTS 2 + +typedef struct DECLSPEC_ALIGN(16) _CONTEXT { + // + // Control flags. + // + uint32_t ContextFlags; + + // + // Integer registers + // + uint64_t X0; + uint64_t Ra; + uint64_t Sp; + uint64_t Gp; + uint64_t Tp; + uint64_t T0; + uint64_t T1; + uint64_t T2; + uint64_t Fp; + uint64_t S1; + uint64_t A0; + uint64_t A1; + uint64_t A2; + uint64_t A3; + uint64_t A4; + uint64_t A5; + uint64_t A6; + uint64_t A7; + uint64_t S2; + uint64_t S3; + uint64_t S4; + uint64_t S5; + uint64_t S6; + uint64_t S7; + uint64_t S8; + uint64_t S9; + uint64_t S10; + uint64_t S11; + uint64_t T3; + uint64_t T4; + uint64_t T5; + uint64_t T6; + uint64_t Pc; + + // + // Floating Point Registers + // + uint64_t F[32]; + uint32_t Fcsr; + + void SetIp(uintptr_t ip) { Pc = ip; } + void SetArg0Reg(uintptr_t val) { A0 = val; } + void SetArg1Reg(uintptr_t val) { A1 = val; } + uintptr_t GetIp() { return Pc; } + uintptr_t GetRa() { return Ra; } + uintptr_t GetSp() { return Sp; } + + template + void ForEachPossibleObjectRef(F lambda) + { + for (uint64_t* pReg = &X0; pReg <= &T6; pReg++) + lambda((size_t*)pReg); + + // RA can be used as a scratch register + lambda((size_t*)&Ra); + } +} CONTEXT, *PCONTEXT; + #elif defined(HOST_WASM) typedef struct DECLSPEC_ALIGN(8) _CONTEXT { diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index 23db950b6156c..0c117737818d2 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -61,6 +61,7 @@ struct PAL_LIMITED_CONTEXT uintptr_t GetLr() const { return LR; } void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(TARGET_ARM64) uintptr_t FP; uintptr_t LR; @@ -91,6 +92,7 @@ struct PAL_LIMITED_CONTEXT uintptr_t GetLr() const { return LR; } void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(TARGET_LOONGARCH64) uintptr_t FP; uintptr_t RA; @@ -121,6 +123,37 @@ struct PAL_LIMITED_CONTEXT void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } +#elif defined(TARGET_RISCV64) + + uintptr_t FP; + uintptr_t RA; + + uintptr_t A0; + uintptr_t A1; + uintptr_t S1; + uintptr_t S2; + uintptr_t S3; + uintptr_t S4; + uintptr_t S5; + uintptr_t S6; + uintptr_t S7; + uintptr_t S8; + uintptr_t S9; + uintptr_t S10; + uintptr_t S11; + + uintptr_t SP; + uintptr_t IP; + + uint64_t F[12]; + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return FP; } + uintptr_t GetRa() const { return RA; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(UNIX_AMD64_ABI) // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15 uintptr_t IP; diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index f2f208c89174c..9b314f3812a62 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -1,5 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + #include "common.h" #include "gcenv.h" #include "CommonTypes.h" @@ -287,6 +288,53 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF m_HijackedReturnValueKind = retValueKind; } +#elif defined(TARGET_RISCV64) + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_FramePointer); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_RIP); + + ASSERT(!(pFrame->m_Flags & PTFF_SAVE_FP)); // FP should never contain a GC ref + + if (pFrame->m_Flags & PTFF_SAVE_S1) { m_RegDisplay.pS1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S2) { m_RegDisplay.pS2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S3) { m_RegDisplay.pS3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S4) { m_RegDisplay.pS4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S5) { m_RegDisplay.pS5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S6) { m_RegDisplay.pS6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S7) { m_RegDisplay.pS7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S8) { m_RegDisplay.pS8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S9) { m_RegDisplay.pS9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S10) { m_RegDisplay.pS10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_S11) { m_RegDisplay.pS11 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_R0) { m_RegDisplay.pR0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_GP) { m_RegDisplay.pGP = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A0) { m_RegDisplay.pA0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A1) { m_RegDisplay.pA1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A2) { m_RegDisplay.pA2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A3) { m_RegDisplay.pA3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A4) { m_RegDisplay.pA4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A5) { m_RegDisplay.pA5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A6) { m_RegDisplay.pA6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_A7) { m_RegDisplay.pA7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T0) { m_RegDisplay.pT0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T1) { m_RegDisplay.pT1 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T2) { m_RegDisplay.pT2 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T3) { m_RegDisplay.pT3 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T4) { m_RegDisplay.pT4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T5) { m_RegDisplay.pT5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_T6) { m_RegDisplay.pT6 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_RA) { m_RegDisplay.pRA = pPreservedRegsCursor++; } + + GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); + if (retValueKind != GCRK_Scalar) + { + m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pA0; + m_HijackedReturnValueKind = retValueKind; + } + #else // TARGET_ARM if (pFrame->m_Flags & PTFF_SAVE_RBX) { m_RegDisplay.pRbx = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_RSI) { m_RegDisplay.pRsi = pPreservedRegsCursor++; } @@ -731,6 +779,43 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR20 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R20); m_RegDisplay.pR21 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R21); +#elif defined(TARGET_RISCV64) + + // + // preserved regs + // + m_RegDisplay.pS1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S1); + m_RegDisplay.pS2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S2); + m_RegDisplay.pS3 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S3); + m_RegDisplay.pS4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S4); + m_RegDisplay.pS5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S5); + m_RegDisplay.pS6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S6); + m_RegDisplay.pS7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S7); + m_RegDisplay.pS8 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S8); + m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S9); + m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S10); + m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, S11); + + // + // scratch regs + // + m_RegDisplay.pR0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R0); + m_RegDisplay.pA0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A0); + m_RegDisplay.pA1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A1); + m_RegDisplay.pA2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A2); + m_RegDisplay.pA3 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A3); + m_RegDisplay.pA4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A4); + m_RegDisplay.pA5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A5); + m_RegDisplay.pA6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A6); + m_RegDisplay.pA7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, A7); + m_RegDisplay.pT0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T0); + m_RegDisplay.pT1 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T1); + m_RegDisplay.pT2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T2); + m_RegDisplay.pT3 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T3); + m_RegDisplay.pT4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T4); + m_RegDisplay.pT5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T5); + m_RegDisplay.pT6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, T6); + #else PORTABILITY_ASSERT("StackFrameIterator::InternalInit"); #endif // TARGET_ARM @@ -887,6 +972,20 @@ void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSou m_RegDisplay.pR31 = thisFuncletPtrs.pR31; m_RegDisplay.pFP = thisFuncletPtrs.pFP; +#elif defined(TARGET_RISCV64) + m_RegDisplay.pS1 = thisFuncletPtrs.pS1; + m_RegDisplay.pS2 = thisFuncletPtrs.pS2; + m_RegDisplay.pS3 = thisFuncletPtrs.pS3; + m_RegDisplay.pS4 = thisFuncletPtrs.pS4; + m_RegDisplay.pS5 = thisFuncletPtrs.pS5; + m_RegDisplay.pS6 = thisFuncletPtrs.pS6; + m_RegDisplay.pS7 = thisFuncletPtrs.pS7; + m_RegDisplay.pS8 = thisFuncletPtrs.pS8; + m_RegDisplay.pS9 = thisFuncletPtrs.pS9; + m_RegDisplay.pS10 = thisFuncletPtrs.pS10; + m_RegDisplay.pS11 = thisFuncletPtrs.pS11; + m_RegDisplay.pFP = thisFuncletPtrs.pFP; + #elif defined(UNIX_AMD64_ABI) // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. m_RegDisplay.pRbp = thisFuncletPtrs.pRbp; @@ -1169,12 +1268,58 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pR30 = SP++; m_RegDisplay.pR31 = SP++; +#elif defined(TARGET_RISCV64) + PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); + + for (int i = 0; i < 32; i++) + { + m_RegDisplay.F[i] = *f++; + } + + SP = (PTR_uintptr_t)f; + + if (!isFilterInvoke) + { + // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two + // thunks, but we don't need to know what they are here, so we just skip them. + SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 6 : 4; + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pS1 = m_RegDisplay.pS1; + m_funcletPtrs.pS2 = m_RegDisplay.pS2; + m_funcletPtrs.pS3 = m_RegDisplay.pS3; + m_funcletPtrs.pS4 = m_RegDisplay.pS4; + m_funcletPtrs.pS5 = m_RegDisplay.pS5; + m_funcletPtrs.pS6 = m_RegDisplay.pS6; + m_funcletPtrs.pS7 = m_RegDisplay.pS7; + m_funcletPtrs.pS8 = m_RegDisplay.pS8; + m_funcletPtrs.pS9 = m_RegDisplay.pS9; + m_funcletPtrs.pS10 = m_RegDisplay.pS10; + m_funcletPtrs.pS11 = m_RegDisplay.pS11; + m_funcletPtrs.pFP = m_RegDisplay.pFP; + } + + m_RegDisplay.pFP = SP++; + + m_RegDisplay.SetIP(*SP++); + + m_RegDisplay.pS1 = SP++; + m_RegDisplay.pS2 = SP++; + m_RegDisplay.pS3 = SP++; + m_RegDisplay.pS4 = SP++; + m_RegDisplay.pS5 = SP++; + m_RegDisplay.pS6 = SP++; + m_RegDisplay.pS7 = SP++; + m_RegDisplay.pS8 = SP++; + m_RegDisplay.pS9 = SP++; + m_RegDisplay.pS10 = SP++; + m_RegDisplay.pS11 = SP++; + #else SP = (PTR_uintptr_t)(m_RegDisplay.SP); ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif -#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) m_RegDisplay.SetIP(PCODEToPINSTR(*SP++)); #endif @@ -1317,7 +1462,29 @@ struct UniversalTransitionStackFrame private: uintptr_t m_pushedFP; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (fp) uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (ra) - Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) + Fp64 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) + uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) + uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) + uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) + +public: + PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_uintptr_t get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedRA); } + PTR_uintptr_t get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP); + } + +#elif defined(TARGET_RISCV64) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + uintptr_t m_pushedRA; // ChildSP+000 CallerSP-0F0 (0x08 bytes) (ra) + uintptr_t m_pushedFP; // ChildSP+008 CallerSP-0E8 (0x08 bytes) (fp) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0E0 (0x80 bytes) (fa0-fa7) uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-060 (0x20 bytes) uintptr_t m_intArgRegs[8]; // ChildSP+0B0 CallerSP-040 (0x40 bytes) (a0-a7) uintptr_t m_stackPassedArgs[1]; // ChildSP+0F0 CallerSP+000 (unknown size) @@ -1409,7 +1576,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() #define STACK_ALIGN_SIZE 16 #elif defined(TARGET_X86) #define STACK_ALIGN_SIZE 4 -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define STACK_ALIGN_SIZE 16 #elif defined(TARGET_WASM) #define STACK_ALIGN_SIZE 4 @@ -1495,6 +1662,19 @@ void StackFrameIterator::UnwindThrowSiteThunk() m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R30); m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R31); m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); +#elif defined(TARGET_RISCV64) + m_RegDisplay.pS1 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S1); + m_RegDisplay.pS2 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S2); + m_RegDisplay.pS3 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S3); + m_RegDisplay.pS4 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S4); + m_RegDisplay.pS5 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S5); + m_RegDisplay.pS6 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S6); + m_RegDisplay.pS7 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S7); + m_RegDisplay.pS8 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S8); + m_RegDisplay.pS9 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S9); + m_RegDisplay.pS10 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S10); + m_RegDisplay.pS11 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, S11); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); #else ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index f174edd4c473b..c7dc3623cb01c 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -191,6 +191,19 @@ class StackFrameIterator PTR_uintptr_t pR30; PTR_uintptr_t pR31; PTR_uintptr_t pFP; +#elif defined(TARGET_RISCV64) + PTR_uintptr_t pS1; + PTR_uintptr_t pS2; + PTR_uintptr_t pS3; + PTR_uintptr_t pS4; + PTR_uintptr_t pS5; + PTR_uintptr_t pS6; + PTR_uintptr_t pS7; + PTR_uintptr_t pS8; + PTR_uintptr_t pS9; + PTR_uintptr_t pS10; + PTR_uintptr_t pS11; + PTR_uintptr_t pFP; #elif defined(UNIX_AMD64_ABI) PTR_uintptr_t pRbp; PTR_uintptr_t pRbx; diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index c8f91a07a2819..9f7211a2ee762 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -24,6 +24,8 @@ #define THUNK_SIZE 16 #elif TARGET_LOONGARCH64 #define THUNK_SIZE 16 +#elif TARGET_RISCV64 +#define THUNK_SIZE 12 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 #endif @@ -255,6 +257,25 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() *((uint32_t*)pCurrentThunkAddress) = 0x4C000280; pCurrentThunkAddress += 4; +#elif defined(TARGET_RISCV64) + + // auipc t0, %hi(delta) // Load upper immediate with address high bits + // ld t1, %lo(delta)(t0) // Load data from address in (t0 + lower immediate) + // jr t1 // Jump and don't link register + + int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); + uint32_t deltaHi = (delta + 0x800) & 0xfffff000; + uint32_t deltaLo = delta << (32 - 12); + + *((uint32_t*)pCurrentThunkAddress) = 0x00000297 | deltaHi; // auipc + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x0002B303 | deltaLo; // addi + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x00030067; // jr + pCurrentThunkAddress += 4; + #else UNREFERENCED_PARAMETER(pCurrentDataAddress); UNREFERENCED_PARAMETER(pCurrentThunkAddress); diff --git a/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h b/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h index ece8ae50b379e..ae76c7ba92e2b 100644 --- a/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h +++ b/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h @@ -13,7 +13,7 @@ typedef uint32_t UIntTarget; typedef uint64_t UIntTarget; #elif defined(TARGET_WASM) typedef uint32_t UIntTarget; -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef uint64_t UIntTarget; #else #error unexpected target architecture diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 21a15a01c905e..28486ae1df5b1 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -399,6 +399,59 @@ enum PInvokeTransitionFrameFlags : uint64_t PTFF_THREAD_ABORT = 0x0000000800000000, // indicates that ThreadAbortException should be thrown when returning from the transition }; +#elif defined(TARGET_RISCV64) +enum PInvokeTransitionFrameFlags : uint64_t +{ + // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp) + + // standard preserved registers + PTFF_SAVE_S1 = 0x0000000000000001, + PTFF_SAVE_S2 = 0x0000000000000002, + PTFF_SAVE_S3 = 0x0000000000000004, + PTFF_SAVE_S4 = 0x0000000000000008, + PTFF_SAVE_S5 = 0x0000000000000010, + PTFF_SAVE_S6 = 0x0000000000000020, + PTFF_SAVE_S7 = 0x0000000000000040, + PTFF_SAVE_S8 = 0x0000000000000080, + PTFF_SAVE_S9 = 0x0000000000000100, + PTFF_SAVE_S10 = 0x0000000000000200, + PTFF_SAVE_S11 = 0x0000000000000400, + + PTFF_SAVE_SP = 0x0000000000000800, + + // Scratch registers + PTFF_SAVE_R0 = 0x0000000000001000, + PTFF_SAVE_GP = 0x0000000000002000, + PTFF_SAVE_A0 = 0x0000000000004000, + PTFF_SAVE_A1 = 0x0000000000008000, + PTFF_SAVE_A2 = 0x0000000000010000, + PTFF_SAVE_A3 = 0x0000000000020000, + PTFF_SAVE_A4 = 0x0000000000040000, + PTFF_SAVE_A5 = 0x0000000000080000, + PTFF_SAVE_A6 = 0x0000000000100000, + PTFF_SAVE_A7 = 0x0000000000200000, + PTFF_SAVE_T0 = 0x0000000000400000, + PTFF_SAVE_T1 = 0x0000000000800000, + PTFF_SAVE_T2 = 0x0000000001000000, + PTFF_SAVE_T3 = 0x0000000002000000, + PTFF_SAVE_T4 = 0x0000000004000000, + PTFF_SAVE_T5 = 0x0000000008000000, + PTFF_SAVE_T6 = 0x0000000010000000, + + PTFF_SAVE_FP = 0x0000000020000000, + + PTFF_SAVE_RA = 0x0000000040000000, + + // used by hijack handler to report return value of hijacked method + PTFF_A0_IS_GCREF = 0x0000000080000000, + PTFF_A0_IS_BYREF = 0x0000000100000000, + PTFF_A1_IS_GCREF = 0x0000000200000000, + PTFF_A1_IS_BYREF = 0x0000000400000000, + + PTFF_THREAD_ABORT = 0x0000000800000000, +}; + #else // TARGET_ARM enum PInvokeTransitionFrameFlags { @@ -460,7 +513,7 @@ struct PInvokeTransitionFrame #else // USE_PORTABLE_HELPERS struct PInvokeTransitionFrame { -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // The FP and LR registers are pushed in different order when setting up frames TgtPTR_Void m_FramePointer; TgtPTR_Void m_RIP; @@ -472,7 +525,7 @@ struct PInvokeTransitionFrame // can be an invalid pointer in universal transition cases (which never need to call GetThread) #ifdef TARGET_ARM64 uint64_t m_Flags; // PInvokeTransitionFrameFlags -#elif TARGET_LOONGARCH64 +#elif TARGET_LOONGARCH64 || TARGET_RISCV64 uint64_t m_Flags; // PInvokeTransitionFrameFlags #else uint32_t m_Flags; // PInvokeTransitionFrameFlags @@ -498,7 +551,7 @@ struct PInvokeTransitionFrame #define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_ARM64) #define OFFSETOF__Thread__m_pTransitionFrame 0x48 -#elif defined(TARGET_LOONGARCH64) +#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_X86) #define OFFSETOF__Thread__m_pTransitionFrame 0x30 diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index cd6f13418b778..b3158741c44d5 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -231,6 +231,57 @@ struct REGDISPLAY inline void SetSP(uintptr_t SP) { this->SP = SP; } }; +#elif defined(TARGET_RISCV64) + +struct REGDISPLAY +{ + PTR_uintptr_t pR0; + PTR_uintptr_t pRA; + + uintptr_t SP; + + PTR_uintptr_t pGP; + PTR_uintptr_t pTP; + PTR_uintptr_t pT0; + PTR_uintptr_t pT1; + PTR_uintptr_t pT2; + PTR_uintptr_t pFP; + PTR_uintptr_t pS1; + PTR_uintptr_t pA0; + PTR_uintptr_t pA1; + PTR_uintptr_t pA2; + PTR_uintptr_t pA3; + PTR_uintptr_t pA4; + PTR_uintptr_t pA5; + PTR_uintptr_t pA6; + PTR_uintptr_t pA7; + PTR_uintptr_t pS2; + PTR_uintptr_t pS3; + PTR_uintptr_t pS4; + PTR_uintptr_t pS5; + PTR_uintptr_t pS6; + PTR_uintptr_t pS7; + PTR_uintptr_t pS8; + PTR_uintptr_t pS9; + PTR_uintptr_t pS10; + PTR_uintptr_t pS11; + PTR_uintptr_t pT3; + PTR_uintptr_t pT4; + PTR_uintptr_t pT5; + PTR_uintptr_t pT6; + + PCODE IP; + + uint64_t F[32]; // Expanded to cover all F registers + + inline PCODE GetIP() { return IP; } + inline uintptr_t GetSP() { return SP; } + inline uintptr_t GetFP() { return *pFP; } + + inline void SetIP(PCODE IP) { this->IP = IP; } + inline void SetSP(uintptr_t SP) { this->SP = SP; } +}; + #elif defined(TARGET_WASM) struct REGDISPLAY @@ -247,7 +298,7 @@ struct REGDISPLAY inline void SetIP(PCODE IP) { } inline void SetSP(uintptr_t SP) { } }; -#endif // HOST_X86 || HOST_AMD64 || HOST_ARM || HOST_ARM64 || HOST_WASM || HOST_LOONGARCH64 +#endif typedef REGDISPLAY * PREGDISPLAY; diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S new file mode 100644 index 0000000000000..5caa170b748fe --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/AllocFast.S @@ -0,0 +1,272 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +// GC type flags +#define GC_ALLOC_FINALIZE 1 + +// +// Rename fields of nested structs +// +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// a0 == MethodTable + LEAF_ENTRY RhpNewFast, _TEXT + + // a1 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD a1 +#endif + + // + // a0 contains MethodTable pointer + // + lw a2, OFFSETOF__MethodTable__m_uBaseSize(a0) + + // + // a0: MethodTable pointer + // a1: Thread pointer + // a2: base size + // + + // Load potential new object address into t3. + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add a2, a2, t3 + ld t4, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a1) + bltu t4, a2, LOCAL_LABEL(RhpNewFast_RarePath) + + // Update the alloc pointer to account for the allocation. + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a1) + + // Set the new objects MethodTable pointer + sd a0, OFFSETOF__Object__m_pEEType(t3) + + mv a0, t3 + ret + +LOCAL_LABEL(RhpNewFast_RarePath): + mv a1, zero + tail RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// a0 == MethodTable + LEAF_ENTRY RhpNewFinalizable, _TEXT + li a1, GC_ALLOC_FINALIZE + tail RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// a0 == MethodTable +// a1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME a3 + + // a3: transition frame + + // Preserve the MethodTable in s0 + mv s0, a0 + + li a2, 0 // numElements + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's MethodTable pointer on success. + beq a0, zero, LOCAL_LABEL(NewOutOfMemory) + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state + +LOCAL_LABEL(NewOutOfMemory): + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mv a0, s0 // MethodTable pointer + li a1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + tail C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// a0 == MethodTable +// a1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size won't overflow + li a2, MAX_STRING_LENGTH + bltu a2, a1, LOCAL_LABEL(StringSizeOverflow) // Branch if a2 < a1 (overflow) + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + li a3, STRING_COMPONENT_SIZE // Load STRING_COMPONENT_SIZE into a3 + slli a2, a1, 1 // a2 = a1 * STRING_COMPONENT_SIZE, where STRING_COMPONENT_SIZE == 2 + addi a2, a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7 + andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) + + // a0 == MethodTable + // a1 == element count + // a2 == string size + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_3 +#else + INLINE_GETTHREAD a3 +#endif + + // Load potential new object address into t3. + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add a2, a2, t3 + ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) + bltu t3, a2, LOCAL_LABEL(RhNewString_Rare) + + // Reload new object address into t3. + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + + // Update the alloc pointer to account for the allocation. + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + + // Set the new object's MethodTable pointer and element count. + sd a0, OFFSETOF__Object__m_pEEType(t3) + sd a1, OFFSETOF__Array__m_Length(t3) + + // Return the object allocated in a0. + mv a0, t3 + + ret + +LOCAL_LABEL(StringSizeOverflow): + // We get here if the length of the final string object cannot be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // a0 holds MethodTable pointer already + li a1, 1 // Indicate that we should throw OverflowException + tail C_FUNC(RhExceptionHandling_FailedAllocation) + +LOCAL_LABEL(RhNewString_Rare): + tail C_FUNC(RhpNewArrayRare) + LEAF_END RhNewString, _TEXT + +// Allocate one-dimensional, zero-based array (SZARRAY). +// a0 == MethodTable +// a1 == element count + LEAF_ENTRY RhpNewArray, _TEXT + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32-dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + li a2, 0x7fffffff + bltu a2, a1, LOCAL_LABEL(ArraySizeOverflow) // Branch if a2 < a1 (check for overflow) + + ld a2, OFFSETOF__MethodTable__m_usComponentSize(a0) // Load component size + mul a2, a1, a2 // a2 = a1 * component size + ld a3, OFFSETOF__MethodTable__m_uBaseSize(a0) // Load base size + add a2, a2, a3 // a2 = a2 + base size + addi a2, a2, 7 // a2 = a2 + 7 + andi a2, a2, ~0x7 // Clear the bits[2:0] of a2 (align to 8 bytes) + + // a0 == MethodTable + // a1 == element count + // a2 == array size + + INLINE_GETTHREAD a3 + + // Load potential new object address into t3. + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add a2, a2, t3 + ld t3, OFFSETOF__Thread__m_eeAllocContext__combined_limit(a3) + bltu t3, a2, LOCAL_LABEL(RhpNewArray_Rare) + + // Reload new object address into t3. + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + + // Update the alloc pointer to account for the allocation. + sd a2, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + + // Set the new object's MethodTable pointer and element count. + sd a0, OFFSETOF__Object__m_pEEType(t3) + sd a1, OFFSETOF__Array__m_Length(t3) + + // Return the object allocated in a0. + mv a0, t3 + + ret + +LOCAL_LABEL(ArraySizeOverflow): + // We get here if the size of the final array object cannot be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // a0 holds MethodTable pointer already + li a1, 1 // Indicate that we should throw OverflowException + tail C_FUNC(RhExceptionHandling_FailedAllocation) + +LOCAL_LABEL(RhpNewArray_Rare): + tail C_FUNC(RhpNewArrayRare) + LEAF_END RhpNewArray, _TEXT + +// Allocate one-dimensional, zero-based array (SZARRAY) using the slow path that calls a runtime helper. +// a0 == MethodTable +// a1 == element count +// a2 == array size + Thread::m_alloc_context::alloc_ptr +// a3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from a2. + ld t3, OFFSETOF__Thread__m_alloc_context__alloc_ptr(a3) + sub a2, a2, t3 + + PUSH_COOP_PINVOKE_FRAME a3 + + // Preserve data we will need later into the callee saved registers + mv s0, a0 // Preserve MethodTable + + mv a2, a1 // numElements + li a1, 0 // uFlags + + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + call C_FUNC(RhpGcAlloc) + + // Set the new object's MethodTable pointer and length on success. + beq a0, zero, LOCAL_LABEL(ArrayOutOfMemory) + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state + +LOCAL_LABEL(ArrayOutOfMemory): + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mv a0, s0 // MethodTable Pointer + li a1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + tail C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..00ec9e8284c86 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/AsmOffsetsCpu.h @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(350, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1C, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(348, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(328, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(318, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(320, StackFrameIterator, m_pPreviousTransitionFrame) + +PLAT_ASM_SIZEOF(E8, PAL_LIMITED_CONTEXT) + +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, A0) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, A1) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, S1) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, S2) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, S3) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, S4) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, S5) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, S6) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, S7) +PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, S8) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, S9) +PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, S10) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, S11) +PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(80, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_SIZEOF(208, REGDISPLAY) + +PLAT_ASM_OFFSET(10, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(48, REGDISPLAY, pS1) +PLAT_ASM_OFFSET(90, REGDISPLAY, pS2) +PLAT_ASM_OFFSET(98, REGDISPLAY, pS3) +PLAT_ASM_OFFSET(A0, REGDISPLAY, pS4) +PLAT_ASM_OFFSET(A8, REGDISPLAY, pS5) +PLAT_ASM_OFFSET(B0, REGDISPLAY, pS6) +PLAT_ASM_OFFSET(B8, REGDISPLAY, pS7) +PLAT_ASM_OFFSET(C0, REGDISPLAY, pS8) +PLAT_ASM_OFFSET(C8, REGDISPLAY, pS9) +PLAT_ASM_OFFSET(D0, REGDISPLAY, pS10) +PLAT_ASM_OFFSET(D8, REGDISPLAY, pS11) +PLAT_ASM_OFFSET(40, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) +PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S new file mode 100644 index 0000000000000..36707233d18f5 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/ExceptionHandling.S @@ -0,0 +1,839 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) +.macro ALLOC_THROW_FRAME exceptionType + + mv a3, sp + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + addi sp, sp, -0x70 + .cfi_adjust_cfa_offset 0x70 + sd a3, 0(sp) // a3 is the SP and a1 is the IP of the fault site + sd a1, 8(sp) + .else + PROLOG_STACK_ALLOC 0x70 + .cfi_adjust_cfa_offset 0x70 + sd a3, 0(sp) // a3 is the SP and ra is the IP of the fault site + sd ra, 8(sp) + .endif + + // Safely using available registers for floating-point saves + fsd fs0, 0x10(sp) + fsd fs1, 0x18(sp) + fsd fs2, 0x20(sp) + fsd fs3, 0x28(sp) + fsd fs4, 0x30(sp) + fsd fs5, 0x38(sp) + fsd fs6, 0x40(sp) + fsd fs7, 0x48(sp) + fsd fs8, 0x50(sp) + fsd fs9, 0x58(sp) + fsd fs10, 0x60(sp) + fsd fs11, 0x68(sp) + + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 0x78 + + sd zero, 0x10(sp) // locations reserved for return value, not used for exception handling + sd zero, 0x18(sp) + PROLOG_SAVE_REG_PAIR s1, s2, 0x20 + PROLOG_SAVE_REG_PAIR s3, s4, 0x30 + PROLOG_SAVE_REG_PAIR s5, s6, 0x40 + PROLOG_SAVE_REG_PAIR s7, s8, 0x50 + PROLOG_SAVE_REG_PAIR s9, s10, 0x60 + PROLOG_SAVE_REG s11, 0x70 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo +.endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,ra, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, ra, 0x68 + PROLOG_SAVE_REG_PAIR s1, s2, 0x10 + PROLOG_SAVE_REG_PAIR s3, s4, 0x20 + PROLOG_SAVE_REG_PAIR s5, s6, 0x30 + PROLOG_SAVE_REG_PAIR s7, s8, 0x40 + PROLOG_SAVE_REG_PAIR s9, s10, 0x50 + PROLOG_SAVE_REG s11, 0x60 + mv fp, sp + .cfi_def_cfa_register fp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. +.macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR s1, s2, 0x10 + EPILOG_RESTORE_REG_PAIR s3, s4, 0x20 + EPILOG_RESTORE_REG_PAIR s5, s6, 0x30 + EPILOG_RESTORE_REG_PAIR s7, s8, 0x40 + EPILOG_RESTORE_REG_PAIR s9, s10, 0x50 + EPILOG_RESTORE_REG s11, 0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x68 +.endm + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + // Load general-purpose registers that are defined + ld t3, OFFSETOF__REGDISPLAY__pS1(\regdisplayReg) // Load address of pS1 + ld s1, 0(t3) // Load pS1 into s1 + ld t3, OFFSETOF__REGDISPLAY__pS2(\regdisplayReg) // Load address of pS2 + ld s2, 0(t3) // Load pS2 into s2 + ld t3, OFFSETOF__REGDISPLAY__pS3(\regdisplayReg) // Load address of pS3 + ld s3, 0(t3) // Load pS3 into s3 + ld t3, OFFSETOF__REGDISPLAY__pS4(\regdisplayReg) // Load address of pS4 + ld s4, 0(t3) // Load pS4 into s4 + ld t3, OFFSETOF__REGDISPLAY__pS5(\regdisplayReg) // Load address of pS5 + ld s5, 0(t3) // Load pS5 into s5 + ld t3, OFFSETOF__REGDISPLAY__pS6(\regdisplayReg) // Load address of pS6 + ld s6, 0(t3) // Load pS6 into s6 + ld t3, OFFSETOF__REGDISPLAY__pS7(\regdisplayReg) // Load address of pS7 + ld s7, 0(t3) // Load pS7 into s7 + ld t3, OFFSETOF__REGDISPLAY__pS8(\regdisplayReg) // Load address of pS8 + ld s8, 0(t3) // Load pS8 into s8 + ld t3, OFFSETOF__REGDISPLAY__pS9(\regdisplayReg) // Load address of pS9 + ld s9, 0(t3) // Load pS9 into s9 + ld t3, OFFSETOF__REGDISPLAY__pS10(\regdisplayReg) // Load address of pS10 + ld s10, 0(t3) // Load pS10 into s10 + ld t3, OFFSETOF__REGDISPLAY__pS11(\regdisplayReg) // Load address of pS11 + ld s11, 0(t3) // Load pS11 into s11 + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) // Load address of pFP + ld fp, 0(t3) // Load pFP into fp + + // + // Load FP preserved registers + // + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F // Base address of floating-point registers + fld fs0, 0x40(t3) // Load fs0 + fld fs1, 0x48(t3) // Load fs1 + fld fs2, 0x90(t3) // Load fs2 + fld fs3, 0x98(t3) // Load fs3 + fld fs4, 0xa0(t3) // Load fs4 + fld fs5, 0xa8(t3) // Load fs5 + fld fs6, 0xb0(t3) // Load fs6 + fld fs7, 0xb8(t3) // Load fs7 + fld fs8, 0xc0(t3) // Load fs8 + fld fs9, 0xc8(t3) // Load fs9 + fld fs10, 0xd0(t3) // Load fs10 + fld fs11, 0xd8(t3) // Load fs11 + + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure +.macro SAVE_PRESERVED_REGISTERS regdisplayReg + + // Save general purpose registers + ld t3, OFFSETOF__REGDISPLAY__pS1(\regdisplayReg) + sd s1, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS2(\regdisplayReg) + sd s2, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS3(\regdisplayReg) + sd s3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS4(\regdisplayReg) + sd s4, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS5(\regdisplayReg) + sd s5, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS6(\regdisplayReg) + sd s6, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS7(\regdisplayReg) + sd s7, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS8(\regdisplayReg) + sd s8, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS9(\regdisplayReg) + sd s9, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS10(\regdisplayReg) + sd s10, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS11(\regdisplayReg) + sd s11, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) + sd fp, 0(t3) + + // Save floating-point registers + addi t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fsd fs0, 0x40(t3) + fsd fs1, 0x48(t3) + fsd fs2, 0x90(t3) + fsd fs3, 0x98(t3) + fsd fs4, 0xa0(t3) + fsd fs5, 0xa8(t3) + fsd fs6, 0xb0(t3) + fsd fs7, 0xb8(t3) + fsd fs8, 0xc0(t3) + fsd fs9, 0xc8(t3) + fsd fs10, 0xd0(t3) + fsd fs11, 0xd8(t3) + +.endm + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure +.macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if _DEBUG + // Create a pattern to store + li a3, 0xbaaddeed + mv t0, a3 + slli a3, a3, 32 + or a3, a3, t0 + + // Store the pattern into each register's location + ld t3, OFFSETOF__REGDISPLAY__pS1(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS2(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS3(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS4(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS5(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS6(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS7(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS8(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS9(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS10(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pS11(\regdisplayReg) + sd a3, 0(t3) + ld t3, OFFSETOF__REGDISPLAY__pFP(\regdisplayReg) + sd a3, 0(t3) +#endif // _DEBUG +.endm + +.macro GetThreadA2 + addi sp, sp, -16 + sd a0, 0(sp) + sd a1, 8(sp) + call C_FUNC(RhpGetThread) + mv a2, a0 + ld a0, 0(sp) + ld a1, 8(sp) + addi sp, sp, 16 +.endm + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: a0[31:0]: exception code of fault +// a1: faulting IP +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + GetThreadA2 + + // Compute address for ExInfo* + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + addiw a3, zero, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 2 + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.HardwareFault + + // Link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + + // Set the exception context field on the ExInfo + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + + // a0[31:0]: exception code + // a1: ExInfo* + call C_FUNC(RhThrowHwEx) + + ALTERNATE_ENTRY RhpThrowHwEx2 + + // No return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: a0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadA2 + + ld a1, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + beq a1, zero, LOCAL_LABEL(NotHijacked) + + ld a3, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + + // Recompute SP at callsite + addi t3, sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) + bltu a3, t3, LOCAL_LABEL(TailCallWasHijacked) // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + + // Normal case where a valid return address location is hijacked + sd a1, 0(a3) + tail ClearThreadState + +LOCAL_LABEL(TailCallWasHijacked): + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // Stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. + mv ra, a1 + + // Compute offsets for PAL_LIMITED_CONTEXT + sd ra, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA)(sp) + sd ra, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)(sp) + +LOCAL_LABEL(ClearThreadState): + + // Clear the Thread's hijack state + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + +LOCAL_LABEL(NotHijacked): + + // Compute the offset for ExInfo + addi a1, sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + sd zero, OFFSETOF__ExInfo__m_exception(a1) // pExInfo->m_exception = null + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) // pExInfo->m_passNumber = 1 + addiw a3, zero, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) // pExInfo->m_idxCurClause = MaxTryRegionIdx + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_kind(a1) // pExInfo->m_kind = ExKind.Throw + + // Link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) // m_pExInfoStackHead = pExInfo + + // Set the exception context field on the ExInfo + addi a2, sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) // pExInfo->m_pExContext = pContext + + // a0: exception object + // a1: ExInfo* + call C_FUNC(RhThrowEx) + + ALTERNATE_ENTRY RhpThrowEx2 + + // No return + EMIT_BREAKPOINT + + NESTED_END RhpThrowEx, _TEXT + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadA2 + + // a1 <- ExInfo* + addi a1, sp, rsp_offsetof_ExInfo + // pExInfo->m_exception = null + sd zero, OFFSETOF__ExInfo__m_exception(a1) + // init to a deterministic value (ExKind.None) + sb zero, OFFSETOF__ExInfo__m_kind(a1) + // pExInfo->m_passNumber = 1 + li a3, 1 + sb a3, OFFSETOF__ExInfo__m_passNumber(a1) + // pExInfo->m_idxCurClause = MaxTryRegionIdx + addiw a3, zero, -1 + sw a3, OFFSETOF__ExInfo__m_idxCurClause(a1) + + // link the ExInfo into the thread's ExInfo chain + ld a3, OFFSETOF__Thread__m_pExInfoStackHead(a2) + // a0 <- current ExInfo + mv a0, a3 + // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + sd a3, OFFSETOF__ExInfo__m_pPrevExInfo(a1) + // m_pExInfoStackHead = pExInfo + sd a1, OFFSETOF__Thread__m_pExInfoStackHead(a2) + + // set the exception context field on the ExInfo + // a2 <- PAL_LIMITED_CONTEXT* + addi a2, sp, rsp_offsetof_Context + // pExInfo->m_pExContext = pContext + sd a2, OFFSETOF__ExInfo__m_pExContext(a1) + + // a0 contains the currently active ExInfo + // a1 contains the address of the new ExInfo + call C_FUNC(RhRethrow) + + ALTERNATE_ENTRY RhpRethrow2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(OBJECTREF exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: a0: exception object +// a1: handler funclet address +// a2: REGDISPLAY* +// a3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + // Allocate space for the call funclet frame + ALLOC_CALL_FUNCLET_FRAME 0x90 + + // Save floating-point registers + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) + + // Save integer registers + sd a0, 0x60(sp) // Save a0 to a3 + sd a1, 0x68(sp) + sd a2, 0x70(sp) + sd a3, 0x78(sp) + sd zero, 0x80(sp) // Make space for local "is_not_handling_thread_abort"; last qword will store the thread obj + +#define rsp_offset_is_not_handling_thread_abort 0x80 +#define rsp_offset_a0 0x60 +#define rsp_offset_a1 0x68 +#define rsp_offset_a2 0x70 +#define rsp_offset_a3 0x78 +#define rsp_CatchFunclet_offset_thread 0x88 + + // Clear the DoNotTriggerGc flag, trashes a4-a6 + call C_FUNC(RhpGetThread) // Call the RhpGetThread function + sd a0, rsp_CatchFunclet_offset_thread(sp) + mv a5, a0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + ld a2, rsp_offset_a2(sp) + ld a3, rsp_offset_a3(sp) + + ld a4, OFFSETOF__Thread__m_threadAbortException(a5) + sub a4, a4, a0 + sd a4, rsp_offset_is_not_handling_thread_abort(sp) // Non-zero if the exception is not ThreadAbortException + + addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags + + addiw a6, zero, -17 // Mask value (0xFFFFFFEF) + amoand.w a4, a6, (t3) + + // Set preserved regs to the values expected by the funclet + RESTORE_PRESERVED_REGISTERS a2 + + // Trash the values at the old homes to make sure nobody uses them + TRASH_PRESERVED_REGISTERS_STORAGE a2 + + // Call the funclet + // a0 still contains the exception object + jalr a1 // Jump to the handler funclet + + ALTERNATE_ENTRY RhpCallCatchFunclet2 + + // a0 contains resume IP + + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + sd a0, rsp_offset_a0(sp) + + SAVE_PRESERVED_REGISTERS a2 + + ld a0, rsp_CatchFunclet_offset_thread(sp) // a0 <- Thread* + ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + call C_FUNC(RhpValidateExInfoPop) + + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS a2 + + ld a0, rsp_offset_a0(sp) // Reload resume IP +#endif + + ld a1, rsp_CatchFunclet_offset_thread(sp) + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 + + ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + +LOCAL_LABEL(PopExInfoLoop): + ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo + beq a3, zero, LOCAL_LABEL(DonePopping) // if (pExInfo == null) { we're done } + blt a3, a2, LOCAL_LABEL(PopExInfoLoop) // if (pExInfo < resume SP} { keep going } + +LOCAL_LABEL(DonePopping): + sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // Store the new head on the Thread + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 + + // Use `andi` with the immediate value 0 + andi a6, a3, (1 << TrapThreadsFlags_AbortInProgress_Bit) // Apply the mask directly + beq a6, zero, LOCAL_LABEL(NoAbort) + + ld a3, rsp_offset_is_not_handling_thread_abort(sp) + bne a3, zero, LOCAL_LABEL(NoAbort) + + // It was the ThreadAbortException, so rethrow it + // Reset SP + mv a1, a0 // a1 <- continuation address as exception PC + li a0, STATUS_REDHAWK_THREAD_ABORT + mv sp, a2 + tail C_FUNC(RhpThrowHwEx) + +LOCAL_LABEL(NoAbort): + // Reset SP and jump to continuation address + mv sp, a2 + jalr zero, 0(a0) // Jump to the continuation address + +#undef rsp_offset_is_not_handling_thread_abort +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CatchFunclet_offset_thread + + NESTED_END RhpCallCatchFunclet, _TEXT + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: handler funclet address +// a1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + // Allocate space for the call funclet frame + ALLOC_CALL_FUNCLET_FRAME 0x80 + + // Save floating-point registers + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) + + // Save integer registers + sd a0, 0x60(sp) // Save a0 to 0x60 + sd a1, 0x68(sp) // Save a1 to 0x68 + +#define rsp_offset_a1 0x68 +#define rsp_FinallyFunclet_offset_thread 0x70 + + // Clear the DoNotTriggerGc flag + call C_FUNC(RhpGetThread) + sd a0, rsp_FinallyFunclet_offset_thread(sp) + mv a2, a0 + ld a0, 0x60(sp) + ld a1, 0x68(sp) + + // Set the DoNotTriggerGc flag + addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags + addiw a3, zero, -17 // Mask value (0xFFFFFFEF) + amoand.w a4, a3, (t3) + + // Restore preserved registers + RESTORE_PRESERVED_REGISTERS a1 + + // Trash the values at the old homes to make sure nobody uses them + TRASH_PRESERVED_REGISTERS_STORAGE a1 + + // Call the funclet + jalr a0 // Jump to the funclet + + ALTERNATE_ENTRY RhpCallFinallyFunclet2 + + ld a1, rsp_offset_a1(sp) // Reload REGDISPLAY pointer + + // Save new values of preserved registers into REGDISPLAY + SAVE_PRESERVED_REGISTERS a1 + + // Restore the DoNotTriggerGc flag + ld a2, rsp_FinallyFunclet_offset_thread(sp) + + addi t3, a2, OFFSETOF__Thread__m_ThreadStateFlags + addiw a3, zero, 16 // Mask value (0x10) + amoor.w a1, a3, (t3) + + // Restore floating-point registers + fld fs0, 0x00(sp) + fld fs1, 0x08(sp) + fld fs2, 0x10(sp) + fld fs3, 0x18(sp) + fld fs4, 0x20(sp) + fld fs5, 0x28(sp) + fld fs6, 0x30(sp) + fld fs7, 0x38(sp) + fld fs8, 0x40(sp) + fld fs9, 0x48(sp) + fld fs10, 0x50(sp) + fld fs11, 0x58(sp) + + // Free call funclet frame + FREE_CALL_FUNCLET_FRAME 0x80 + + // Return + EPILOG_RETURN + +#undef rsp_offset_a1 +#undef rsp_FinallyFunclet_offset_thread + + NESTED_END RhpCallFinallyFunclet, _TEXT + +// +// void* FASTCALL RhpCallFilterFunclet(OBJECTREF exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: exception object +// a1: filter funclet address +// a2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x60 + fsd fs0, 0x00(sp) + fsd fs1, 0x08(sp) + fsd fs2, 0x10(sp) + fsd fs3, 0x18(sp) + fsd fs4, 0x20(sp) + fsd fs5, 0x28(sp) + fsd fs6, 0x30(sp) + fsd fs7, 0x38(sp) + fsd fs8, 0x40(sp) + fsd fs9, 0x48(sp) + fsd fs10, 0x50(sp) + fsd fs11, 0x58(sp) + + ld t3, OFFSETOF__REGDISPLAY__pFP(a2) + ld fp, 0(t3) + + // + // call the funclet + // + // a0 still contains the exception object + jalr a1 + + ALTERNATE_ENTRY RhpCallFilterFunclet2 + + fld fs0, 0x00(sp) + fld fs1, 0x08(sp) + fld fs2, 0x10(sp) + fld fs3, 0x18(sp) + fld fs4, 0x20(sp) + fld fs5, 0x28(sp) + fld fs6, 0x30(sp) + fld fs7, 0x38(sp) + fld fs8, 0x40(sp) + fld fs9, 0x48(sp) + fld fs10, 0x50(sp) + fld fs11, 0x58(sp) + + FREE_CALL_FUNCLET_FRAME 0x60 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, Text + +#ifdef FEATURE_OBJCMARSHAL + +// +// void* FASTCALL RhpCallPropagateExceptionCallback(void* pCallbackContext, void* pCallback, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo, PInvokeTransitionFrame* pPreviousTransitionFrame) +// +// INPUT: a0: callback context +// a1: callback +// a2: REGDISPLAY* +// a3: ExInfo* +// a4: pPreviousTransitionFrame +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler + +#define rsp_offset_a0 0x10 +#define rsp_offset_a1 0x18 +#define rsp_offset_a2 0x20 +#define rsp_offset_a3 0x28 +#define rsp_offset_a4 0x30 +#define rsp_CallPropagationCallback_offset_thread 0x38 + + // Using the NO_FP macro so that the debugger unwinds using SP. + // This makes backtraces work even after using RESTORE_PRESERVED_REGISTERS. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, ra, 0x40 + mv fp, sp + sd a0, rsp_offset_a0(sp) // a0 to a4 are stored to restore them anytime + sd a1, rsp_offset_a1(sp) + sd a2, rsp_offset_a2(sp) + sd a3, rsp_offset_a3(sp) + sd a4, rsp_offset_a4(sp) + sd zero, rsp_CallPropagationCallback_offset_thread(sp) // zero makes space to store the thread obj + + // clear the DoNotTriggerGc flag, trashes a4-a6 + call C_FUNC(RhpGetThread) + sd a0, rsp_CallPropagationCallback_offset_thread(sp) + mv a5, a0 + ld a0, rsp_offset_a0(sp) + ld a1, rsp_offset_a1(sp) + ld a2, rsp_offset_a2(sp) + ld a3, rsp_offset_a3(sp) + + addi t3, a5, OFFSETOF__Thread__m_ThreadStateFlags + + addiw a6, zero, -17 // Mask value (0xFFFFFFEF) + amoand.w a4, t3, a6 + + // set preserved regs to the values expected by the funclet + RESTORE_PRESERVED_REGISTERS a2 + // trash the values at the old homes to make sure nobody uses them + TRASH_PRESERVED_REGISTERS_STORAGE a2 + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + SAVE_PRESERVED_REGISTERS a2 + + ld a0, rsp_CallPropagationCallback_offset_thread(sp) // a0 <- Thread* + ld a1, rsp_offset_a3(sp) // a1 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + call C_FUNC(RhpValidateExInfoPop) + + ld a2, rsp_offset_a2(sp) // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS a2 +#endif + + ld a1, rsp_CallPropagationCallback_offset_thread(sp) + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK a1, a3, t3 // Thread in a1, trashes a3 and t3 + + ld a3, rsp_offset_a3(sp) // a3 <- current ExInfo* + ld a2, OFFSETOF__REGDISPLAY__SP(a2) // a2 <- resume SP value + +LOCAL_LABEL(Propagate_PopExInfoLoop): + ld a3, OFFSETOF__ExInfo__m_pPrevExInfo(a3) // a3 <- next ExInfo + beqz a3, LOCAL_LABEL(Propagate_DonePopping) // if (pExInfo == null) { we're done } + blt a3, a2, LOCAL_LABEL(Propagate_PopExInfoLoop) // if (pExInfo < resume SP) { keep going } + +LOCAL_LABEL(Propagate_DonePopping): + sd a3, OFFSETOF__Thread__m_pExInfoStackHead(a1) // store the new head on the Thread + + // restore preemptive mode + ld a4, rsp_offset_a4(sp) // pPreviousTransitionFrame + sd a4, OFFSETOF__Thread__m_pTransitionFrame(a1) + + // reset SP and RA and jump to continuation address + ld a0, rsp_offset_a0(sp) // callback context + ld a1, rsp_offset_a1(sp) // callback + ld a2, rsp_offset_a2(sp) // REGDISPLAY* + ld a3, OFFSETOF__REGDISPLAY__pRA(a2) // a3 <- &resume RA value + ld ra, 0(a3) + ld a3, OFFSETOF__REGDISPLAY__SP(a2) // a3 <- resume SP value + mv sp, a3 + jalr zero, 0(a1) + +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CallPropagationCallback_offset_thread + + NESTED_END RhpCallPropagateExceptionCallback, _TEXT + +#endif // FEATURE_OBJCMARSHAL diff --git a/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S new file mode 100644 index 0000000000000..f6a5119967172 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/GcProbe.S @@ -0,0 +1,173 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#define PROBE_FRAME_SIZE 0xB0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 11 * 8 for callee saved registers + + // 1 * 8 for caller SP + + // 2 * 8 for int returns + + // 4 * 8 for FP returns + +// Define the prolog for setting up the PInvokeTransitionFrame +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + + # Define the method prologue, allocating enough stack space for the PInvokeTransitionFrame and saving + # incoming register values into it. + + # First create PInvokeTransitionFrame + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, PROBE_FRAME_SIZE # Push down stack pointer and store FP (s10) and RA (ra) + + # Slot at sp+0x10 is reserved for Thread * + # Slot at sp+0x18 is reserved for bitmask of saved registers + + # Save callee-saved registers + PROLOG_SAVE_REG_PAIR s1, s2, 0x20 + PROLOG_SAVE_REG_PAIR s3, s4, 0x30 + PROLOG_SAVE_REG_PAIR s5, s6, 0x40 + PROLOG_SAVE_REG_PAIR s7, s8, 0x50 + PROLOG_SAVE_REG_PAIR s9, s10, 0x60 + PROLOG_SAVE_REG s11, 0x70 + + # Slot at sp+0x78 is reserved for caller sp + + # Save the integer return registers + sd a0, 0x80(sp) + sd a1, 0x88(sp) + + # Save the FP return registers + fsd fa0, 0x90(sp) + fsd fa1, 0x98(sp) + fsd fa2, 0xa0(sp) + fsd fa3, 0xa8(sp) + + # Perform the rest of the PInvokeTransitionFrame initialization. + sd \threadReg, OFFSETOF__PInvokeTransitionFrame__m_pThread(sp) # Thread * (unused by stackwalker) + sd \BITMASK, (OFFSETOF__PInvokeTransitionFrame__m_pThread + 8)(sp) # Save the register bitmask passed in by caller + + addi \trashReg, sp, PROBE_FRAME_SIZE # Recover value of caller's SP + sd \trashReg, 0x78(sp) # Save caller's SP + + # Link the frame into the Thread + mv \trashReg, sp + sd \trashReg, OFFSETOF__Thread__m_pDeferredTransitionFrame(\threadReg) + +.endm + +// Define the prolog for removing the PInvokeTransitionFrame +.macro POP_PROBE_FRAME + + // Restore the integer return registers + ld a0, 0x80(sp) + ld a1, 0x88(sp) + + // Restore the FP return registers + fld fa0, 0x90(sp) + fld fa1, 0x98(sp) + fld fa2, 0xa0(sp) + fld fa3, 0xa8(sp) + + // Restore callee saved registers + EPILOG_RESTORE_REG_PAIR s1, s2, 0x20 + EPILOG_RESTORE_REG_PAIR s3, s4, 0x30 + EPILOG_RESTORE_REG_PAIR s5, s6, 0x40 + EPILOG_RESTORE_REG_PAIR s7, s8, 0x50 + EPILOG_RESTORE_REG_PAIR s9, s10, 0x60 + EPILOG_RESTORE_REG s11, 0x70 + + // Restore the frame pointer and return address + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, PROBE_FRAME_SIZE +.endm + +// Fix up the hijacked callstack +.macro FixupHijackedCallstack + + // a2 <- GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else + INLINE_GETTHREAD a2 +#endif + + // Fix the stack by restoring the original return address + ld ra, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + ld t3, (OFFSETOF__Thread__m_pvHijackedReturnAddress + 8)(a2) + + // Clear hijack state + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(a2) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(a2) + sd zero, OFFSETOF__Thread__m_uHijackedReturnValueFlags(a2) +.endm + +// +// GC Probe Hijack target +// +NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler + FixupHijackedCallstack + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a3 + andi t3, a3, 1 << TrapThreadsFlags_TrapThreads_Bit + bnez t3, LOCAL_LABEL(WaitForGC) + jr ra + +LOCAL_LABEL(WaitForGC): + li t6, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_A0 + PTFF_SAVE_A1) + or t3, t3, t6 + tail C_FUNC(RhpWaitForGC) +NESTED_END RhpGcProbeHijack + +.global C_FUNC(RhpThrowHwEx) + +// Wait for GC function +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + PUSH_PROBE_FRAME a2, a3, t3 + + ld a0, OFFSETOF__Thread__m_pDeferredTransitionFrame(a2) + call C_FUNC(RhpWaitForGC2) + + ld a2, OFFSETOF__PInvokeTransitionFrame__m_Flags(sp) + + # Load PTFF_THREAD_ABORT_BIT into a register, using t2 if needed + slli t2, a2, (63-PTFF_THREAD_ABORT_BIT) + srli t2, t2, 63 + bnez t2, LOCAL_LABEL(ThrowThreadAbort) + + .cfi_remember_state + POP_PROBE_FRAME + EPILOG_RETURN + + .cfi_restore_state +LOCAL_LABEL(ThrowThreadAbort): + POP_PROBE_FRAME + li a0, STATUS_REDHAWK_THREAD_ABORT + mv a1, ra # Set return address as exception PC + call C_FUNC(RhpThrowHwEx) +NESTED_END RhpWaitForGC + +.global C_FUNC(RhpGcPoll2) + +// GC Poll function +LEAF_ENTRY RhpGcPoll + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a0 + bne a0, zero, C_FUNC(RhpGcPollRare) + jr ra +LEAF_END RhpGcPoll + +// Rare GC Poll function +NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME a0 + call RhpGcPoll2 + POP_COOP_PINVOKE_FRAME + jr ra +NESTED_END RhpGcPollRare + +#ifdef FEATURE_GC_STRESS + +// GC Stress Hijack targets +LEAF_ENTRY RhpGcStressHijack, _TEXT + // Not Yet Implemented (NYI) + EMIT_BREAKPOINT +LEAF_END RhpGcStressHijack, _TEXT + +#endif // FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S new file mode 100644 index 0000000000000..c508edfb2ffce --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/InteropThunksHelpers.S @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +#define POINTER_SIZE 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: tp: thunk's data block + // + // TRASHES: t0, t1, tp + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + +#ifdef FEATURE_EMULATED_TLS + // If using TLS emulation, fetch the TLS data block address + GETTHUNKDATA_ETLS_9 +#else + // Inline function to get TLS variable + INLINE_GET_TLS_VAR t0, C_FUNC(tls_thunkData) +#endif + + // t0 = base address of TLS data + // tp = address of context cell in thunk's data + + // Load the thunk address from the data block and store it in the thread's static storage + ld t1, 0(t0) // Load thunk address into t1 from the TLS base address + sd t1, 0(t0) // Store the thunk address in thread static storage + + // Load the target address from the data block and jump to it + ld t1, POINTER_SIZE(t0) // Load target address into t1 from the data block + jalr t1 // Jump to the target address in t1 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + PREPARE_EXTERNAL_VAR RhCommonStub, a0 + jr ra // Return address in ra + LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S new file mode 100644 index 0000000000000..ea5d91a1a1c1f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/MiscStubs.S @@ -0,0 +1,5 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S new file mode 100644 index 0000000000000..96295ab0af779 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/PInvoke.S @@ -0,0 +1,49 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// +// RhpPInvoke +// +// IN: a0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The code generator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the code generator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + sd fp, OFFSETOF__PInvokeTransitionFrame__m_FramePointer(a0) + sd ra, OFFSETOF__PInvokeTransitionFrame__m_RIP(a0) + sd sp, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs(a0) + li t0, PTFF_SAVE_SP + sd t0, OFFSETOF__PInvokeTransitionFrame__m_Flags(a0) + + // get TLS global variable address +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD a1 +#endif + + sd a1, OFFSETOF__PInvokeTransitionFrame__m_pThread(a0) + sd a0, OFFSETOF__Thread__m_pTransitionFrame(a1) + jr ra +NESTED_END RhpPInvoke, _TEXT + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ld t0, OFFSETOF__PInvokeTransitionFrame__m_pThread(a0) + sd zero, OFFSETOF__Thread__m_pTransitionFrame(t0) + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, a5 + + bne t0, zero, 1f // TrapThreadsFlags_None = 0 + jr ra +1: + // passing transition frame pointer in a0 + tail C_FUNC(RhpWaitForGC2) +LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S new file mode 100644 index 0000000000000..6ba375c91f1b1 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/StubDispatch.S @@ -0,0 +1,116 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Load cache entry data into a temporary register + ld t6, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))(t0) + + // Compare with MethodTable* in t1 + bne t1, t6, 0f + + // Load the target address from the cache entry + ld t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)(t0) + + // Jump to the address in t6 + jr t0 + + 0: + .endm + + // + // Macro that generates a stub consuming a cache with the given number of entries. + // + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY RhpInterfaceDispatch\entries, _TEXT, NoHandler + + // t0 holds the indirection cell address. Load the cache pointer. + ld t0, OFFSETOF__InterfaceDispatchCell__m_pCache(a1) // Using a1 as an alternative base register + + // Load the MethodTable from the object instance in a0. + ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries + ld t1, 0(a0) + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // t0 still contains the indirection cell address. + tail C_FUNC(RhpInterfaceDispatchSlow) + + NESTED_END RhpInterfaceDispatch\entries, _TEXT + + .endm + + // + // Define all the stub routines we currently need. + // + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + + // + // Initial dispatch on an interface when we don't have a cache yet. + // + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // Trigger an AV if we're dispatching on a null this. + // The exception handling infrastructure is aware of the fact that this is the first + // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here + // to a NullReferenceException at the callsite. + ld zero, 0(a0) + + // Just tail call to the cache miss helper. + tail C_FUNC(RhpInterfaceDispatchSlow) + LEAF_END RhpInitialInterfaceDispatch, _TEXT + + // + // Stub dispatch routine for dispatch to a vtable slot + // + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // t2 contains the interface dispatch cell address. + // Load t3 to point to the vtable offset (which is stored in the m_pCache field). + ld t3, OFFSETOF__InterfaceDispatchCell__m_pCache(t2) + + // Load the MethodTable from the object instance in a0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ld t4, 0(a0) + add t3, t3, t4 + + // Load the target address of the vtable into t3 + ld t3, 0(t3) + + jr t3 + LEAF_END RhpVTableOffsetDispatch, _TEXT + + // + // Cache miss case, call the runtime to resolve the target and update the cache. + // Use universal transition helper to allow an exception to flow out of resolution. + // + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // t2 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // t3: target address for the thunk to call + // t4: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, t3 + tail C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S new file mode 100644 index 0000000000000..45d61e749700f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/UniversalTransition.S @@ -0,0 +1,177 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + .global RhpIntegerTrashValues + .global RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +#define COUNT_ARG_REGISTERS (8) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (16) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_RA_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_RA_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// RhpUniversalTransition +// +// At input to this function, a0-7, f0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// t7 will contain the managed function that is to be called by this transition function +// t8 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// a0 shall contain a pointer to the TransitionBlock +// a1 shall contain the value that was in t8 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+0F0 CallerSP+000 +// {IntArgRegs (a0-a7) (0x40 bytes)} ChildSP+0B0 CallerSP-040 +// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-060 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0E0 +// {PushedRA} ChildSP+008 CallerSP-0E8 +// {PushedFP} ChildSP+000 CallerSP-0F0 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // Save FP and RA registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, STACK_SIZE + + // Save Floating Point registers + fsd fa0, 0x10(sp) // Save f0 + fsd fa1, 0x18(sp) // Save f1 + fsd fa2, 0x20(sp) // Save f2 + fsd fa3, 0x28(sp) // Save f3 + fsd fa4, 0x30(sp) // Save f4 + fsd fa5, 0x38(sp) // Save f5 + fsd fa6, 0x40(sp) // Save f6 + fsd fa7, 0x48(sp) // Save f7 + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + sd a0, 0xb0(sp) // Save a0 + sd a1, 0xb8(sp) // Save a1 + sd a2, 0xc0(sp) // Save a2 + sd a3, 0xc8(sp) // Save a3 + sd a4, 0xd0(sp) // Save a4 + sd a5, 0xd8(sp) // Save a5 + sd a6, 0xe0(sp) // Save a6 + sd a7, 0xe8(sp) // Save a7 + + // Optionally prepare the values to trash saved argument registers + #ifdef TRASH_SAVED_ARGUMENT_REGISTERS + PREPARE_EXTERNAL_VAR RhpFpTrashValues, a1 + + fld fa0, 0(a1) // Load fa0 from a1 + fld fa1, 0x08(a1) // Load fa1 from a1 + fld fa2, 0x10(a1) // Load fa2 from a1 + fld fa3, 0x18(a1) // Load fa3 from a1 + fld fa4, 0x20(a1) // Load fa4 from a1 + fld fa5, 0x28(a1) // Load fa5 from a1 + fld fa6, 0x30(a1) // Load fa6 from a1 + fld fa7, 0x38(a1) // Load fa7 from a1 + + PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, a1 + + ld a2, 0x10(a1) // Load a2 from a1 + ld a3, 0x18(a1) // Load a3 from a1 + ld a4, 0x20(a1) // Load a4 from a1 + ld a5, 0x28(a1) // Load a5 from a1 + ld a6, 0x30(a1) // Load a6 from a1 + ld a7, 0x38(a1) // Load a7 from a1 + #endif // TRASH_SAVED_ARGUMENT_REGISTERS + + addi a0, sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function + mv a1, t1 // Second parameter to target function + jalr t0, t1, 0 // Jump to the function in t1 + + // Restore the result address from t2 + mv t2, a0 // Move result to t2 + + // Restore Floating Point registers + fld f0, 0x100(sp) // Restore f0 + fld f1, 0x108(sp) // Restore f1 + fld f2, 0x110(sp) // Restore f2 + fld f3, 0x118(sp) // Restore f3 + fld f4, 0x120(sp) // Restore f4 + fld f5, 0x128(sp) // Restore f5 + fld f6, 0x130(sp) // Restore f6 + fld f7, 0x138(sp) // Restore f7 + + // Restore argument registers + ld a0, 0x140(sp) // Restore a0 + ld a1, 0x148(sp) // Restore a1 + ld a2, 0x150(sp) // Restore a2 + ld a3, 0x158(sp) // Restore a3 + ld a4, 0x160(sp) // Restore a4 + ld a5, 0x168(sp) // Restore a5 + ld a6, 0x170(sp) // Restore a6 + ld a7, 0x178(sp) // Restore a7 + + // Restore FP and RA registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, STACK_SIZE + + // Tailcall to the target address in t2 + jalr t2, 0 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall diff --git a/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S new file mode 100644 index 0000000000000..21457e69fab88 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/riscv64/WriteBarriers.S @@ -0,0 +1,364 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references were never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this cannot be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK + + .global g_GCShadow + .global g_GCShadowEnd + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // + // On exit: + // t3,t4: trashed + // other registers are preserved + // + .macro UPDATE_GC_SHADOW destReg, refReg + + // If g_GCShadow is 0, don't perform the check. + la t3, g_GCShadow + ld t3, 0(t3) + beq t3, zero, 1f + li t4, 0 + + // Save destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + mv t4, \destReg + + // Transform destReg into the equivalent address in the shadow heap. + la t3, g_lowest_address + ld t3, 0(t3) + sub \destReg, \destReg, t3 + bltz \destReg, 0f + + la t3, g_GCShadow + ld t3, 0(t3) + add \destReg, \destReg, t3 + + la t3, g_GCShadowEnd + ld t3, 0(t3) + bgeu \destReg, t3, 0f + + // Update the shadow heap. + sd \refReg, 0(\destReg) + + // The following read must be strongly ordered with respect to the write we have just performed in order to + // prevent race conditions. + fence rw, rw + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + mv t3, t4 + ld t3, 0(t3) + beq t3, \refReg, 0f + + // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we cannot + // guarantee whose shadow update won. + li t3, INVALIDGCVALUE + sd t3, 0(\destReg) + +0: + // Restore original destReg value + mv \destReg, t4 + +1: + .endm + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated (cannot be t3,t4) + // refReg: objectref to be stored (cannot be t3,t4) + // + // On exit: + // t3,t4: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + la t3, g_write_watch_table + ld t3, 0(t3) + beq t3, zero, 2f + srai t5, \destReg, 12 + add t3, t3, t5 // SoftwareWriteWatch::AddressToTableByteIndexShift + lb t4, 0(t3) + bne t4, zero, 2f + li t4, 0xFF + sb t4, 0(t3) +#endif + +2: + // We can skip the card table write if the reference is to + // an object not on the ephemeral segment. + la t3, g_ephemeral_low + ld t3, 0(t3) + blt \refReg, t3, 0f + + la t3, g_ephemeral_high + ld t3, 0(t3) + bgeu \refReg, t3, 0f + + // Set this object's card, if it has not already been set. + la t3, g_card_table + srai t5, \destReg, 11 + add t4, t3, t5 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-processor systems since it avoids cache thrashing. + lb t3, 0(t4) + li t5, 0xFF + beq t3, t5, 0f + + li t3, 0xFF + sb t3, 0(t4) + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Check if we need to update the card bundle table + la t3, g_card_bundle_table + srai t5, \destReg, 21 + add t4, t3, t5 + lb t3, 0(t4) + li t5, 0xFF + beq t3, t5, 0f + + li t3, 0xFF + sb t3, 0(t4) +#endif + +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // + // On exit: + // t3, t4: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg + + // The "check" of this checked write barrier - is destReg + // within the heap? If not, early out. + la t3, g_lowest_address + ld t3, 0(t3) + sltu t4, \destReg, t3 + + la t3, g_highest_address + ld t3, 0(t3) + + // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. + // Otherwise, set the C flag (0x2) to take the next branch. + bnez t4, 1f + bgeu \destReg, t3, 0f + +1: + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg + +0: + // Exit label + .endm + +// void JIT_ByRefWriteBarrier +// On entry: +// t6 : the source address (points to object reference to write) +// t5 : the destination address (object reference written here) +// +// On exit: +// t6 : incremented by 8 +// t5 : incremented by 8 +// t4 : trashed +// t2, t3 : trashed +// +// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF +// if you add more trashed registers. +// +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address +LEAF_ENTRY RhpByRefAssignRef, _TEXT + + ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 + ld t5, 0(t6) + addi t6, t6, 8 + j C_FUNC(RhpCheckedAssignRef) + +LEAF_END RhpByRefAssignRef, _TEXT + +// JIT_CheckedWriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that may reside +// on the managed heap. +// +// On entry: +// t5 : the destination address (LHS of the assignment). +// May not be a heap location (hence the checked). +// t6 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t5 : incremented by 8 +LEAF_ENTRY RhpCheckedAssignRef, _TEXT + + // is destReg within the heap? + la t3, g_lowest_address + ld t3, 0(t3) + sltu t4, t6, t3 + + la t3, g_highest_address + ld t3, 0(t3) + sltu t0, t3, t6 + or t4, t0, t4 + beq t4, zero, C_FUNC(RhpAssignRefRiscV64) + +NotInHeap: + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + sd t6, 0(t5) + addi t5, t5, 8 + jalr ra + +LEAF_END RhpCheckedAssignRef, _TEXT + +// JIT_WriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// t5 : the destination address (LHS of the assignment). +// t6 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 +LEAF_ENTRY RhpAssignRefRiscV64, _TEXT + + ALTERNATE_ENTRY RhpAssignRefAVLocation + sd t6, 0(t5) + + INSERT_UNCHECKED_WRITE_BARRIER_CORE t5, t6 + + addi t5, t5, 8 + jalr ra + +LEAF_END RhpAssignRefRiscV64, _TEXT + +// Same as RhpAssignRefRiscV64, but with standard ABI. +LEAF_ENTRY RhpAssignRef, _TEXT + mv t5, a0 ; t5 = dst + mv t6, a1 ; t6 = val + j C_FUNC(RhpAssignRefRiscV64) +LEAF_END RhpAssignRef, _TEXT + + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// a2: comparand +// +// On exit: +// a0: original value of objectref +// t1, t3, t6, t4: trashed +// +LEAF_ENTRY RhpCheckedLockCmpXchg + + mv t1, a2 + ld t0, 0(a0) + beq t0, t1, EndOfExchange + mv t1, t0 + j EndOfExchange + sd a1, 0(a0) + +EndOfExchange: + bne a2, t1, CmpXchgNoUpdate + +DoCardsCmpXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in a0 and the value in a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1 + +CmpXchgNoUpdate: + // t1 still contains the original value. + mv a0, t1 + + jalr ra + +LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// +// On exit: +// a0: original value of objectref +// t1: trashed +// t3, t6, t4: trashed +// +LEAF_ENTRY RhpCheckedXchg + + ld t1, 0(a0) + sd a1, 0(a0) + +DoCardsXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in a0 and the value in a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE a0, a1 + + // t1 still contains the original value. + mv a0, t1 + + jalr ra + +LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h index 983f17a36aba0..7bf519097e474 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h @@ -87,7 +87,7 @@ FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pD return result; } -#if defined(HOST_AMD64) || defined(HOST_ARM64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_RISCV64) FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) { __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index a4a9e599af127..ace74dc2d9c9f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -136,6 +136,42 @@ #define MCREG_S8(mc) ((mc).__gregs[31]) #define MCREG_Pc(mc) ((mc).__pc) +#elif defined(HOST_RISCV64) + +#define MCREG_R0(mc) ((mc).__gregs[0]) +#define MCREG_Ra(mc) ((mc).__gregs[1]) +#define MCREG_Sp(mc) ((mc).__gregs[2]) +#define MCREG_Gp(mc) ((mc).__gregs[3]) +#define MCREG_Tp(mc) ((mc).__gregs[4]) +#define MCREG_T0(mc) ((mc).__gregs[5]) +#define MCREG_T1(mc) ((mc).__gregs[6]) +#define MCREG_T2(mc) ((mc).__gregs[7]) +#define MCREG_Fp(mc) ((mc).__gregs[8]) +#define MCREG_S1(mc) ((mc).__gregs[9]) +#define MCREG_A0(mc) ((mc).__gregs[10]) +#define MCREG_A1(mc) ((mc).__gregs[11]) +#define MCREG_A2(mc) ((mc).__gregs[12]) +#define MCREG_A3(mc) ((mc).__gregs[13]) +#define MCREG_A4(mc) ((mc).__gregs[14]) +#define MCREG_A5(mc) ((mc).__gregs[15]) +#define MCREG_A6(mc) ((mc).__gregs[16]) +#define MCREG_A7(mc) ((mc).__gregs[17]) +#define MCREG_S2(mc) ((mc).__gregs[18]) +#define MCREG_S3(mc) ((mc).__gregs[19]) +#define MCREG_S4(mc) ((mc).__gregs[20]) +#define MCREG_S5(mc) ((mc).__gregs[21]) +#define MCREG_S6(mc) ((mc).__gregs[22]) +#define MCREG_S7(mc) ((mc).__gregs[23]) +#define MCREG_S8(mc) ((mc).__gregs[24]) +#define MCREG_S9(mc) ((mc).__gregs[25]) +#define MCREG_S10(mc) ((mc).__gregs[26]) +#define MCREG_S11(mc) ((mc).__gregs[27]) +#define MCREG_T3(mc) ((mc).__gregs[28]) +#define MCREG_T4(mc) ((mc).__gregs[29]) +#define MCREG_T5(mc) ((mc).__gregs[30]) +#define MCREG_T6(mc) ((mc).__gregs[31]) +#define MCREG_Pc(mc) ((mc).__gregs[0]) + #elif HOST_64BIT #define MCREG_Rip(mc) ((mc).__gregs[_REG_RIP]) #define MCREG_Rsp(mc) ((mc).__gregs[_REG_RSP]) @@ -207,6 +243,42 @@ #define MCREG_S8(mc) ((mc).__gregs[31]) #define MCREG_Pc(mc) ((mc).__pc) +#elif defined(HOST_RISCV64) + +#define MCREG_R0(mc) ((mc).__gregs[0]) +#define MCREG_Ra(mc) ((mc).__gregs[1]) +#define MCREG_Sp(mc) ((mc).__gregs[2]) +#define MCREG_Gp(mc) ((mc).__gregs[3]) +#define MCREG_Tp(mc) ((mc).__gregs[4]) +#define MCREG_T0(mc) ((mc).__gregs[5]) +#define MCREG_T1(mc) ((mc).__gregs[6]) +#define MCREG_T2(mc) ((mc).__gregs[7]) +#define MCREG_Fp(mc) ((mc).__gregs[8]) +#define MCREG_S1(mc) ((mc).__gregs[9]) +#define MCREG_A0(mc) ((mc).__gregs[10]) +#define MCREG_A1(mc) ((mc).__gregs[11]) +#define MCREG_A2(mc) ((mc).__gregs[12]) +#define MCREG_A3(mc) ((mc).__gregs[13]) +#define MCREG_A4(mc) ((mc).__gregs[14]) +#define MCREG_A5(mc) ((mc).__gregs[15]) +#define MCREG_A6(mc) ((mc).__gregs[16]) +#define MCREG_A7(mc) ((mc).__gregs[17]) +#define MCREG_S2(mc) ((mc).__gregs[18]) +#define MCREG_S3(mc) ((mc).__gregs[19]) +#define MCREG_S4(mc) ((mc).__gregs[20]) +#define MCREG_S5(mc) ((mc).__gregs[21]) +#define MCREG_S6(mc) ((mc).__gregs[22]) +#define MCREG_S7(mc) ((mc).__gregs[23]) +#define MCREG_S8(mc) ((mc).__gregs[24]) +#define MCREG_S9(mc) ((mc).__gregs[25]) +#define MCREG_S10(mc) ((mc).__gregs[26]) +#define MCREG_S11(mc) ((mc).__gregs[27]) +#define MCREG_T3(mc) ((mc).__gregs[28]) +#define MCREG_T4(mc) ((mc).__gregs[29]) +#define MCREG_T5(mc) ((mc).__gregs[30]) +#define MCREG_T6(mc) ((mc).__gregs[31]) +#define MCREG_Pc(mc) ((mc).__gregs[0]) + #elif HOST_64BIT #define MCREG_Rip(mc) ((mc).gregs[REG_RIP]) #define MCREG_Rsp(mc) ((mc).gregs[REG_RSP]) @@ -352,6 +424,42 @@ #define MCREG_S8(mc) ((mc).regs[31]) #define MCREG_Pc(mc) ((mc).pc) +#elif defined(HOST_RISCV64) + +#define MCREG_R0(mc) ((mc).regs[0]) +#define MCREG_Ra(mc) ((mc).regs[1]) +#define MCREG_Sp(mc) ((mc).regs[2]) +#define MCREG_Gp(mc) ((mc).regs[3]) +#define MCREG_Tp(mc) ((mc).regs[4]) +#define MCREG_T0(mc) ((mc).regs[5]) +#define MCREG_T1(mc) ((mc).regs[6]) +#define MCREG_T2(mc) ((mc).regs[7]) +#define MCREG_Fp(mc) ((mc).regs[8]) +#define MCREG_S1(mc) ((mc).regs[9]) +#define MCREG_A0(mc) ((mc).regs[10]) +#define MCREG_A1(mc) ((mc).regs[11]) +#define MCREG_A2(mc) ((mc).regs[12]) +#define MCREG_A3(mc) ((mc).regs[13]) +#define MCREG_A4(mc) ((mc).regs[14]) +#define MCREG_A5(mc) ((mc).regs[15]) +#define MCREG_A6(mc) ((mc).regs[16]) +#define MCREG_A7(mc) ((mc).regs[17]) +#define MCREG_S2(mc) ((mc).regs[18]) +#define MCREG_S3(mc) ((mc).regs[19]) +#define MCREG_S4(mc) ((mc).regs[20]) +#define MCREG_S5(mc) ((mc).regs[21]) +#define MCREG_S6(mc) ((mc).regs[22]) +#define MCREG_S7(mc) ((mc).regs[23]) +#define MCREG_S8(mc) ((mc).regs[24]) +#define MCREG_S9(mc) ((mc).regs[25]) +#define MCREG_S10(mc) ((mc).regs[26]) +#define MCREG_S11(mc) ((mc).regs[27]) +#define MCREG_T3(mc) ((mc).regs[28]) +#define MCREG_T4(mc) ((mc).regs[29]) +#define MCREG_T5(mc) ((mc).regs[30]) +#define MCREG_T6(mc) ((mc).regs[31]) +#define MCREG_Pc(mc) ((mc).pc) + #else // For FreeBSD, as found in x86/ucontext.h @@ -516,6 +624,31 @@ MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ MCREG_A1(nativeContext->uc_mcontext) = arg1Reg; +#elif defined(HOST_RISCV64) + +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Pc, IP) \ + ASSIGN_REG(Sp, SP) \ + ASSIGN_REG(Fp, FP) \ + ASSIGN_REG(Ra, RA) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(S1, S1) \ + ASSIGN_REG(S2, S2) \ + ASSIGN_REG(S3, S3) \ + ASSIGN_REG(S4, S4) \ + ASSIGN_REG(S5, S5) \ + ASSIGN_REG(S6, S6) \ + ASSIGN_REG(S7, S7) \ + ASSIGN_REG(S8, S8) \ + ASSIGN_REG(S9, S9) \ + ASSIGN_REG(S10, S10) \ + ASSIGN_REG(S11, S11) + +#define ASSIGN_TWO_ARGUMENT_REGS \ + MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_A1(nativeContext->uc_mcontext) = arg1Reg; + #elif defined(HOST_WASM) // TODO: determine how unwinding will work on WebAssembly #define ASSIGN_CONTROL_REGS @@ -716,6 +849,42 @@ uint64_t GetPC(void* context) uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } // R3 uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } +#elif TARGET_RISCV64 + + uint64_t& UNIX_CONTEXT::R0() { return (uint64_t&)MCREG_R0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Ra() { return (uint64_t&)MCREG_Ra(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Gp() { return (uint64_t&)MCREG_Gp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Tp() { return (uint64_t&)MCREG_Tp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T0() { return (uint64_t&)MCREG_T0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T1() { return (uint64_t&)MCREG_T1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T2() { return (uint64_t&)MCREG_T2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Fp() { return (uint64_t&)MCREG_Fp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S1() { return (uint64_t&)MCREG_S1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A0() { return (uint64_t&)MCREG_A0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A1() { return (uint64_t&)MCREG_A1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A2() { return (uint64_t&)MCREG_A2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A3() { return (uint64_t&)MCREG_A3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A4() { return (uint64_t&)MCREG_A4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A5() { return (uint64_t&)MCREG_A5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A6() { return (uint64_t&)MCREG_A6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::A7() { return (uint64_t&)MCREG_A7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S2() { return (uint64_t&)MCREG_S2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S3() { return (uint64_t&)MCREG_S3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S4() { return (uint64_t&)MCREG_S4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S5() { return (uint64_t&)MCREG_S5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S6() { return (uint64_t&)MCREG_S6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S7() { return (uint64_t&)MCREG_S7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S8() { return (uint64_t&)MCREG_S8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S9() { return (uint64_t&)MCREG_S9(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S10() { return (uint64_t&)MCREG_S10(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::S11() { return (uint64_t&)MCREG_S11(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T3() { return (uint64_t&)MCREG_T3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T4() { return (uint64_t&)MCREG_T4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T5() { return (uint64_t&)MCREG_T5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::T6() { return (uint64_t&)MCREG_T6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } + #else PORTABILITY_ASSERT("UNIX_CONTEXT"); #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index c12fae7d03355..792f223dc3204 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -217,6 +217,61 @@ struct UNIX_CONTEXT lambda((size_t*)&Ra()); } +#elif defined(TARGET_RISCV64) + + uint64_t& R0(); + uint64_t& Ra(); + uint64_t& Sp(); + uint64_t& Gp(); + uint64_t& Tp(); + uint64_t& T0(); + uint64_t& T1(); + uint64_t& T2(); + uint64_t& Fp(); + uint64_t& S1(); + uint64_t& A0(); + uint64_t& A1(); + uint64_t& A2(); + uint64_t& A3(); + uint64_t& A4(); + uint64_t& A5(); + uint64_t& A6(); + uint64_t& A7(); + uint64_t& S2(); + uint64_t& S3(); + uint64_t& S4(); + uint64_t& S5(); + uint64_t& S6(); + uint64_t& S7(); + uint64_t& S8(); + uint64_t& S9(); + uint64_t& S10(); + uint64_t& S11(); + uint64_t& T3(); + uint64_t& T4(); + uint64_t& T5(); + uint64_t& T6(); + uint64_t& Pc(); + + uintptr_t GetIp() { return (uintptr_t)Pc(); } + uintptr_t GetSp() { return (uintptr_t)Sp(); } + + template + void ForEachPossibleObjectRef(F lambda) + { + // It is expected that registers are stored in a contiguous manner + // Here are some asserts to check + ASSERT(&A0() + 1 == &A1()); + ASSERT(&A0() + 7 == &A7()); + + for (uint64_t* pReg = &Ra(); pReg <= &T6(); pReg++) + lambda((size_t*)pReg); + + // Ra and Fp can be used as scratch registers + lambda((size_t*)&Ra()); + lambda((size_t*)&Fp()); + } + #else PORTABILITY_ASSERT("UNIX_CONTEXT"); #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 9f982f630bcec..454aadf184b22 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -399,7 +399,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) ASSERT(((uintptr_t)pvAddress & 1) == 0); #endif -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) MethodInfo methodInfo; FindMethodInfo(pvAddress, &methodInfo); pMethodInfo = &methodInfo; @@ -722,6 +722,63 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre return savedFp && savedRa && establishedFp ? 0 : 1; +#elif defined(TARGET_RISCV64) + +// store pair with signed offset +// 0100 00xx xxxxxxxx xxxx xxxx xxxx xxxx +#define STW_PAIR_BITS 0x04000000 +#define STW_PAIR_MASK 0xFC000000 + +// add fp, sp, x +// addi fp, sp, x +// 0000 0001 100x xxxx xxxx xxxx 0000 0000 +#define ADD_FP_SP_BITS 0x01C00000 +#define ADD_FP_SP_MASK 0xFFFFE000 + +#define STW_PAIR_RS1_MASK 0xF80 +#define STW_PAIR_RS1_SP 0xF80 +#define STW_PAIR_RS1_FP 0xF00 +#define STW_PAIR_RS2_MASK 0xF00 +#define STW_PAIR_RS2_FP 0xF00 +#define STW_PAIR_RS2_RA 0xF40 + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + ASSERT(pNativeMethodInfo != NULL); + + uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; + bool savedFp = false; + bool savedRa = false; + bool establishedFp = false; + + for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFp && savedRa && establishedFp); pInstr++) + { + uint32_t instr = *pInstr; + + if (((instr & STW_PAIR_MASK) == STW_PAIR_BITS) && + ((instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_SP || (instr & STW_PAIR_RS1_MASK) == STW_PAIR_RS1_FP) && + ((instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP || (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA)) + { + // SP/FP-relative store of pair of registers + savedFp |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_FP; + savedRa |= (instr & STW_PAIR_RS2_MASK) == STW_PAIR_RS2_RA; + } + else if ((instr & ADD_FP_SP_MASK) == ADD_FP_SP_BITS) + { + establishedFp = true; + } + else + { + // JIT generates other patterns into the prolog that we currently don't + // recognize (saving unpaired register, stack pointer adjustments). We + // don't need to recognize these patterns unless a compact unwinding code + // is generated for them in ILC. + // https://github.com/dotnet/runtime/issues/76371 + return -1; + } + } + + return savedFp && savedRa && establishedFp ? 0 : 1; + #else return -1; @@ -1152,6 +1209,62 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } } +#elif defined(TARGET_RISCV64) + +// Load with immediate +// LUI, LD, etc. +// 0000 0000 0000 0000 1111 1111 1111 1111 +#define LUI_BITS 0x00000037 +#define LUI_MASK 0x0000007F + +// Load with register offset +// LD with register offset +// 0000 0000 0000 0000 0111 0000 0000 0000 +#define LD_BITS 0x00000003 +#define LD_MASK 0x0000007F + +// Branches, Jumps, System calls +// BEQ, BNE, JAL, etc. +// 1100 0000 0000 0000 0000 0000 0000 0000 +#define BEGS_BITS 0x00000063 +#define BEGS_MASK 0x0000007F + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + ASSERT(pNativeMethodInfo != NULL); + + uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; + + // Since we stop on branches, the search is roughly limited by the containing basic block. + // We typically examine just 1-5 instructions and in rare cases up to 30. + // + // TODO: we can also limit the search by the longest possible epilogue length, but + // we must be sure the longest length considers all possibilities, + // which is somewhat nontrivial to derive/prove. + // It does not seem urgent, but it could be nice to have a constant upper bound. + for (uint32_t* pInstr = (uint32_t*)pvAddress - 1; pInstr > start; pInstr--) + { + uint32_t instr = *pInstr; + + // Check for branches, jumps, or system calls. + // If we see such instructions before registers are restored, we are not in an epilogue. + // Note: this includes RET, branches, jumps, and system calls. + if ((instr & BEGS_MASK) == BEGS_BITS) + { + // not in an epilogue + break; + } + + // Check for restoring registers (FP or RA) with `ld` + int rd = (instr >> 7) & 0x1F; // Extract the destination register + if (rd == 8 || rd == 1) // Check for FP (x8) or RA (x1) + { + if ((instr & LD_MASK) == LD_BITS) // Match `ld` instruction + { + return -1; + } + } + } + #endif return 0; @@ -1194,9 +1307,9 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Decode the GC info for the current method to determine its return type GcInfoDecoderFlags flags = DECODE_RETURN_KIND; -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS); -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 +#endif GcInfoDecoder decoder(GCInfoToken(p), flags); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); @@ -1316,6 +1429,42 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pRA; return true; + +#elif defined(TARGET_RISCV64) + + if (decoder.HasTailCalls()) + { + // Do not hijack functions that have tail calls, since there are two problems: + // 1. When a function that tail calls another one is hijacked, the RA may be + // stored at a different location in the stack frame of the tail call target. + // So just by performing tail call, the hijacked location becomes invalid and + // unhijacking would corrupt stack by writing to that location. + // 2. There is a small window after the caller pops RA from the stack in its + // epilog and before the tail called function pushes RA in its prolog when + // the hijacked return address would not be on the stack and so we would + // not be able to unhijack. + return false; + } + + PTR_uintptr_t pRA = pRegisterSet->pRA; + if (!VirtualUnwind(pMethodInfo, pRegisterSet)) + { + return false; + } + + if (pRegisterSet->pRA == pRA) + { + // This is the case when we are either: + // + // 1) In a leaf method that does not push RA on stack, OR + // 2) In the prolog/epilog of a non-leaf method that has not yet pushed RA on stack + // or has RA already popped off. + return false; + } + + *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pRA; + return true; + #else return false; #endif // defined(TARGET_AMD64) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 67701b45dd948..650ff860bfba9 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -36,6 +36,8 @@ using libunwind::Registers_arm64; using libunwind::CompactUnwinder_arm64; #elif defined(TARGET_LOONGARCH64) using libunwind::Registers_loongarch; +#elif defined(TARGET_RISCV64) +using libunwind::Registers_riscv; #elif defined(TARGET_X86) using libunwind::Registers_x86; #else @@ -1088,6 +1090,293 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) #endif // TARGET_LOONGARCH64 +#if defined(TARGET_RISCV64) + +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_RISCV; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; + + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + + double getFloatRegister(int num) const { abort(); } + void setFloatRegister(int num, double value) { abort(); } + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); + + uint64_t getSP() const { return SP; } + void setSP(uint64_t value, uint64_t location) { SP = value; } + uint64_t getIP() const { return IP; } + void setIP(uint64_t value, uint64_t location) { IP = value; } + uint64_t getFP() const { return *pFP; } + void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location; } +}; + +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_RISCV_X2) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_RISCV_X0 && num <= UNW_RISCV_X31) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + // Vector registers currently unsupported + return false; +} + +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { + switch (regNum) { + case UNW_RISCV_X0: + return *pT0; + case UNW_RISCV_X1: + return *pT1; + case UNW_RISCV_X2: + return *pT2; + case UNW_RISCV_X3: + return *pT3; + case UNW_RISCV_X4: + return *pT4; + case UNW_RISCV_X5: + return *pT5; + case UNW_RISCV_X6: + return *pT6; + // Add other general-purpose registers if needed + + case UNW_RISCV_F0: + return F[0]; + case UNW_RISCV_F1: + return F[1]; + case UNW_RISCV_F2: + return F[2]; + case UNW_RISCV_F3: + return F[3]; + case UNW_RISCV_F4: + return F[4]; + case UNW_RISCV_F5: + return F[5]; + case UNW_RISCV_F6: + return F[6]; + case UNW_RISCV_F7: + return F[7]; + case UNW_RISCV_F8: + return F[8]; + case UNW_RISCV_F9: + return F[9]; + case UNW_RISCV_F10: + return F[10]; + case UNW_RISCV_F11: + return F[11]; + case UNW_RISCV_F12: + return F[12]; + case UNW_RISCV_F13: + return F[13]; + case UNW_RISCV_F14: + return F[14]; + case UNW_RISCV_F15: + return F[15]; + case UNW_RISCV_F16: + return F[16]; + case UNW_RISCV_F17: + return F[17]; + case UNW_RISCV_F18: + return F[18]; + case UNW_RISCV_F19: + return F[19]; + case UNW_RISCV_F20: + return F[20]; + case UNW_RISCV_F21: + return F[21]; + case UNW_RISCV_F22: + return F[22]; + case UNW_RISCV_F23: + return F[23]; + case UNW_RISCV_F24: + return F[24]; + case UNW_RISCV_F25: + return F[25]; + case UNW_RISCV_F26: + return F[26]; + case UNW_RISCV_F27: + return F[27]; + case UNW_RISCV_F28: + return F[28]; + case UNW_RISCV_F29: + return F[29]; + case UNW_RISCV_F30: + return F[30]; + case UNW_RISCV_F31: + return F[31]; + // Add other floating-point registers if needed + + case UNW_RISCV_VLENB: + return 0; // VLENB not used in REGDISPLAY, adjust if needed + + default: + PORTABILITY_ASSERT("unsupported RISC-V register"); + } +} + +void Registers_REGDISPLAY::setRegister(int regNum, uint64_t value, uint64_t location) +{ + switch (regNum) { + case UNW_RISCV_X0: + *pT0 = value; + break; + case UNW_RISCV_X1: + *pT1 = value; + break; + case UNW_RISCV_X2: + *pT2 = value; + break; + case UNW_RISCV_X3: + *pT3 = value; + break; + case UNW_RISCV_X4: + *pT4 = value; + break; + case UNW_RISCV_X5: + *pT5 = value; + break; + case UNW_RISCV_X6: + *pT6 = value; + break; + + // Add other general-purpose registers if needed + + case UNW_RISCV_F0: + F[0] = value; + break; + case UNW_RISCV_F1: + F[1] = value; + break; + case UNW_RISCV_F2: + F[2] = value; + break; + case UNW_RISCV_F3: + F[3] = value; + break; + case UNW_RISCV_F4: + F[4] = value; + break; + case UNW_RISCV_F5: + F[5] = value; + break; + case UNW_RISCV_F6: + F[6] = value; + break; + case UNW_RISCV_F7: + F[7] = value; + break; + case UNW_RISCV_F8: + F[8] = value; + break; + case UNW_RISCV_F9: + F[9] = value; + break; + case UNW_RISCV_F10: + F[10] = value; + break; + case UNW_RISCV_F11: + F[11] = value; + break; + case UNW_RISCV_F12: + F[12] = value; + break; + case UNW_RISCV_F13: + F[13] = value; + break; + case UNW_RISCV_F14: + F[14] = value; + break; + case UNW_RISCV_F15: + F[15] = value; + break; + case UNW_RISCV_F16: + F[16] = value; + break; + case UNW_RISCV_F17: + F[17] = value; + break; + case UNW_RISCV_F18: + F[18] = value; + break; + case UNW_RISCV_F19: + F[19] = value; + break; + case UNW_RISCV_F20: + F[20] = value; + break; + case UNW_RISCV_F21: + F[21] = value; + break; + case UNW_RISCV_F22: + F[22] = value; + break; + case UNW_RISCV_F23: + F[23] = value; + break; + case UNW_RISCV_F24: + F[24] = value; + break; + case UNW_RISCV_F25: + F[25] = value; + break; + case UNW_RISCV_F26: + F[26] = value; + break; + case UNW_RISCV_F27: + F[27] = value; + break; + case UNW_RISCV_F28: + F[28] = value; + break; + case UNW_RISCV_F29: + F[29] = value; + break; + case UNW_RISCV_F30: + F[30] = value; + break; + case UNW_RISCV_F31: + F[31] = value; + break; + + // Add other floating-point registers if needed + + case UNW_RISCV_VLENB: + PORTABILITY_ASSERT("unsupported RISC-V VLENB register"); + break; + + default: + PORTABILITY_ASSERT("unsupported RISC-V register"); + } +} + +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + PORTABILITY_ASSERT("Vector registers currently unsupported on RISC-V"); +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + PORTABILITY_ASSERT("Vector registers currently unsupported on RISC-V"); +} + +#endif // TARGET_RISCV64 + bool UnwindHelpers::StepFrame(REGDISPLAY *regs, unw_word_t start_ip, uint32_t format, unw_word_t unwind_info) { #if _LIBUNWIND_SUPPORT_DWARF_UNWIND @@ -1106,6 +1395,12 @@ bool UnwindHelpers::StepFrame(REGDISPLAY *regs, unw_word_t start_ip, uint32_t fo int stepRet = compactInst.stepWithCompactEncoding(format, start_ip, _addressSpace, *(Registers_REGDISPLAY*)regs); return stepRet == UNW_STEP_SUCCESS; } +#elif defined(TARGET_RISCV64) + if ((format & UNWIND_RISCV64_MODE_MASK) != UNWIND_RISCV64_MODE_DWARF) { + CompactUnwinder_riscv64 compactInst; + int stepRet = compactInst.stepWithCompactEncoding(format, start_ip, _addressSpace, *(Registers_REGDISPLAY*)regs); + return stepRet == UNW_STEP_SUCCESS; + } #elif defined(TARGET_AMD64) if ((format & UNWIND_X86_64_MODE_MASK) != UNWIND_X86_64_MODE_DWARF) { CompactUnwinder_x86_64 compactInst; @@ -1157,6 +1452,8 @@ bool UnwindHelpers::GetUnwindProcInfo(PCODE pc, UnwindInfoSections &uwInfoSectio libunwind::UnwindCursor uc(_addressSpace); #elif defined(HOST_LOONGARCH64) libunwind::UnwindCursor uc(_addressSpace); +#elif defined(HOST_RISCV64) + libunwind::UnwindCursor uc(_addressSpace); #else #error "Unwinding is not implemented for this architecture yet." #endif @@ -1181,6 +1478,12 @@ bool UnwindHelpers::GetUnwindProcInfo(PCODE pc, UnwindInfoSections &uwInfoSectio } else { dwarfOffsetHint = procInfo->format & UNWIND_LOONGARCH64_DWARF_SECTION_OFFSET; } +#elif defined(TARGET_RISCV64) + if ((procInfo->format & UNWIND_RISCV64_MODE_MASK) != UNWIND_RISCV64_MODE_DWARF) { + return true; + } else { + dwarfOffsetHint = procInfo->format & UNWIND_RISCV64_DWARF_SECTION_OFFSET; + } #elif defined(TARGET_AMD64) if ((procInfo->format & UNWIND_X86_64_MODE_MASK) != UNWIND_X86_64_MODE_DWARF) { return true; diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 68ba993209e42..4cf213cab49ab 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -44,4 +44,6 @@ #include "unixasmmacrosx86.inc" #elif defined(HOST_LOONGARCH64) #include "unixasmmacrosloongarch64.inc" +#elif defined(HOST_RISCV64) +#include "unixasmmacrosriscv64.inc" #endif diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc new file mode 100644 index 0000000000000..7f8ac78d34c1f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -0,0 +1,341 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmOffsets.inc" + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 (standard across most platforms) + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LABELED_RETURN_ADDRESS Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + lui \HelperReg, %hi(C_FUNC(\Name)) + addi \HelperReg, \HelperReg, %lo(C_FUNC(\Name)) +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg + lui \HelperReg, %hi(C_FUNC(\Name)) + ld \HelperReg, %lo(C_FUNC(\Name))(\HelperReg) +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg + lui \HelperReg, %hi(C_FUNC(\Name)) + lw \HelperReg, %lo(C_FUNC(\Name))(\HelperReg) +.endm + +.macro PROLOG_STACK_ALLOC Size + // If Size is larger than 2047, split it into multiple instructions + .if (\Size > 2047) || (\Size < -2048) + li t0, -\Size + add sp, sp, t0 + .else + addi sp, sp, -\Size + .endif +.endm + +.macro EPILOG_STACK_FREE Size + // If Size is larger than 2047 or smaller than -2048, split into multiple instructions + .if (\Size > 2047) || (\Size < -2048) + li t0, \Size // Load the large Size value into a temporary register + add sp, sp, t0 // Use the add instruction for full 64-bit addition + .cfi_adjust_cfa_offset -\Size + .else + addi sp, sp, \Size // Handle small immediate directly with addi + .cfi_adjust_cfa_offset -\Size + .endif +.endm + +.macro EPILOG_STACK_RESTORE + mv sp, fp + .cfi_restore fp +.endm + +.macro PROLOG_SAVE_REG reg, ofs + sd \reg, \ofs(sp) + .cfi_rel_offset \reg, \ofs +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs + sd \reg1, \ofs(sp) + sd \reg2, \ofs + 8(sp) + .cfi_rel_offset \reg1, \ofs + .cfi_rel_offset \reg2, \ofs + 8 + .ifc \reg1, fp + mv fp, sp + .cfi_def_cfa_register fp + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ssize, __def_cfa_save=1 + addi sp, sp, -\ssize + //.cfi_adjust_cfa_offset \ssize + .cfi_def_cfa sp, \ssize + + sd \reg1, 0(sp) + sd \reg2, 8(sp) + + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 + .if (\__def_cfa_save == 1) + mv fp, sp + .cfi_def_cfa_register fp + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ssize + addi sp, sp, -\ssize + //.cfi_adjust_cfa_offset \ssize + .cfi_def_cfa sp, \ssize + + sd \reg1, 0(sp) + sd \reg2, 8(sp) + + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 +.endm + +.macro EPILOG_RESTORE_REG reg, ofs + ld \reg, \ofs(sp) + .cfi_restore \reg +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ld \reg1, \ofs(sp) + ld \reg2, \ofs+8(sp) + .cfi_restore \reg1 + .cfi_restore \reg2 +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs + ld \reg1, (sp) + ld \reg2, 8(sp) + addi sp, sp, \ofs + .cfi_restore \reg1 + .cfi_restore \reg2 + .cfi_adjust_cfa_offset -\ofs +.endm + +.macro EPILOG_RETURN + jalr x0, ra +.endm + +.macro EMIT_BREAKPOINT + ebreak +.endm + +.macro EPILOG_BRANCH_REG reg + jalr \reg +.endm + +// Loads the address of a thread-local variable into the target register, +// which cannot be x0. Preserves all other registers. +.macro INLINE_GET_TLS_VAR target, var + .ifc \target, x0 + .error "target cannot be x0" + .endif + + sd x0, -16(sp) + sd ra, -8(sp) + + // RISC-V does not have a direct equivalent to Apple's or GNU's TLS + // handling, so we'll use an indirect approach and inline assembly + // if needed. + lui t0, %hi(\var) + addi t0, t0, %lo(\var) + ld \target, 0(t0) + + // This sequence is a placeholder; actual TLS handling may require + // platform-specific instructions or further customization. + + ld ra, -8(sp) + ld x0, -16(sp) +.endm +// Inlined version of RhpGetThread. Target cannot be x0. +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +.endm + +// Do not use these ETLS macros in functions that already create a stack frame. +// Creating two stack frames in one function can confuse the unwinder/debugger + +.macro GETTHREAD_ETLS_1 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + + call C_FUNC(RhpGetThread) + mv x1, x0 + + ld x0, 16(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 +.endm + +.macro GETTHREAD_ETLS_2 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + sd x1, 24(sp) + + call C_FUNC(RhpGetThread) + mv x2, x0 + + ld x0, 16(sp) + ld x1, 24(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 +.endm + +.macro GETTHREAD_ETLS_3 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -48 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + sd x1, 24(sp) + sd x2, 32(sp) + + call C_FUNC(RhpGetThread) + mv x3, x0 + + ld x0, 16(sp) + ld x1, 24(sp) + ld x2, 32(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 48 +.endm + +.macro GETTHUNKDATA_ETLS_9 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -96 // Push down stack pointer and store FP and RA + sd x0, 16(sp) + sd x1, 24(sp) + sd x2, 32(sp) + sd x3, 40(sp) + sd x4, 48(sp) + sd x5, 56(sp) + sd x6, 64(sp) + sd x7, 72(sp) + sd x8, 80(sp) + sd x9, 88(sp) + + call C_FUNC(RhpGetThunkData) + mv x10, x0 + + ld x0, 16(sp) + ld x1, 24(sp) + ld x2, 32(sp) + ld x3, 40(sp) + ld x4, 48(sp) + ld x5, 56(sp) + ld x6, 64(sp) + ld x7, 72(sp) + ld x8, 80(sp) + ld x9, 88(sp) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 96 +.endm + +.macro InterlockedOperationBarrier + fence rw, rw +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ld \trashReg1, OFFSETOF__Thread__m_pvHijackedReturnAddress(\threadReg) + beqz \trashReg1, 0f + + ld \trashReg2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(\threadReg) + sd \trashReg1, 0(\trashReg2) + sd zero, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation(\threadReg) + sd zero, OFFSETOF__Thread__m_pvHijackedReturnAddress(\threadReg) +0: +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +#define PTFF_SAVE_SP 0x00000800 +#define PTFF_SAVE_A0 0x00004000 +#define PTFF_SAVE_A1 0x00008000 +#define PTFF_SAVE_ALL_PRESERVED 0x000007FF // NOTE: S1-S11 + +#define DEFAULT_FRAME_SAVE_FLAGS PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -0x80 // Push down stack pointer and store FP and RA + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, 0x20 + PROLOG_SAVE_REG_PAIR x21, x22, 0x30 + PROLOG_SAVE_REG_PAIR x23, x24, 0x40 + PROLOG_SAVE_REG_PAIR x25, x26, 0x50 + PROLOG_SAVE_REG_PAIR x27, x28, 0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + add \trashReg, sp, 0x80 + sd \trashReg, 0x70(sp) + + // Record the bitmask of saved registers in the frame (slot #3) + li \trashReg, DEFAULT_FRAME_SAVE_FLAGS + sd \trashReg, 0x18(sp) + + mv \trashReg, sp +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + EPILOG_RESTORE_REG_PAIR x19, x20, 0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, 0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, 0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, 0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, 0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 0x80 +.endm + +// Bit position for the flags above, to be used with tbz / tbnz instructions +#define PTFF_THREAD_ABORT_BIT 35 + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 +#define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +#define TrapThreadsFlags_AbortInProgress_Bit 0 +#define TrapThreadsFlags_TrapThreads_Bit 1 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs index c7a96e12ddf17..ab3583c38d145 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs @@ -44,6 +44,8 @@ public static TypeSystemContext Create() TargetArchitecture.Wasm32, #elif TARGET_LOONGARCH64 TargetArchitecture.LoongArch64, +#elif TARGET_RISCV64 + TargetArchitecture.RiscV64, #else #error Unknown architecture #endif diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs index 07d9d0ccb4a06..68e299378af59 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Relocation.cs @@ -20,6 +20,7 @@ public enum RelocType IMAGE_REL_BASED_LOONGARCH64_PC = 0x16, // LoongArch64: pcalau12i+imm12 IMAGE_REL_BASED_LOONGARCH64_JIR = 0x17, // LoongArch64: pcaddu18i+jirl IMAGE_REL_BASED_RISCV64_PC = 0x18, // RiscV64: auipc + IMAGE_REL_BASED_RISCV64_JALR = 0x19, // RiscV64: jalr (indirect jump) IMAGE_REL_BASED_RELPTR32 = 0x7C, // 32-bit relative address from byte starting reloc // This is a special NGEN-specific relocation type // for relative pointer (used to make NGen relocation diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs index a382331040c4d..a285069cfada2 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunGenericHelperNode.cs @@ -39,6 +39,17 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref RiscV64Emitter enco // Load the generic dictionary cell encoder.EmitLD(result, context, dictionarySlot * factory.Target.PointerSize); + + // If there's any invalid entries, we need to test for them + // + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // flag can change over the course of compilation and the bad slot helper dependency + // should be reported by someone else - the system should not rely on it coming from here. + if (!relocsOnly && _hasInvalidEntries) + { + encoder.EmitXORI(encoder.TargetRegister.IntraProcedureCallScratch1, result, 0); + encoder.EmitJALR(Register.X0, encoder.TargetRegister.IntraProcedureCallScratch1, 0); + } } protected sealed override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder, bool relocsOnly) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs index 620878463e3e3..c6ad35d1c76ad 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_RiscV64/RiscV64ReadyToRunHelperNode.cs @@ -140,12 +140,12 @@ protected override void EmitCode(NodeFactory factory, ref RiscV64Emitter encoder case ReadyToRunHelperId.ResolveVirtualFunction: { - // Not tested - encoder.EmitBreak(); - MethodDesc targetMethod = (MethodDesc)Target; if (targetMethod.OwningType.IsInterface) { + // Not tested + encoder.EmitBreak(); + encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod)); encoder.EmitJMP(factory.ExternSymbol("RhpResolveInterfaceMethod")); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 1d78df875c125..20b2253175bc0 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -66,6 +66,7 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, { TargetArchitecture.ARM64 => "RhpAssignRefArm64", TargetArchitecture.LoongArch64 => "RhpAssignRefLoongArch64", + TargetArchitecture.RiscV64 => "RhpAssignRefRiscV64", _ => "RhpAssignRef" }; break; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs index 643f14056bd4d..99a1d8b53ac94 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs @@ -86,6 +86,12 @@ public DwarfBuilder( _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; break; + case TargetArchitecture.RiscV64: + _targetPointerSize = 8; + _frameRegister = 8; // FP + _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; + break; + default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs index e5303e64f1aa7..b183131115725 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs @@ -93,6 +93,19 @@ public DwarfCie(TargetArchitecture targetArchitecture) InitialCFAOffset = 0; break; + case TargetArchitecture.RiscV64: + CodeAlignFactor = 1; + DataAlignFactor = -8; + ReturnAddressRegister = 1; // RA + Instructions = new byte[] + { + DW_CFA_def_cfa, + 2, // SP + 0, // Offset from SP + }; + InitialCFAOffset = 0; + break; + default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs index 89c2188774416..28a17af35a35f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs @@ -165,6 +165,10 @@ public static int DwarfRegNum(TargetArchitecture architecture, int regNum) // Normal registers are directly mapped return regNum; + case TargetArchitecture.RiscV64: + // Normal registers are directly mapped + return regNum; + default: throw new NotSupportedException(); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs index 8288f7fe8bd35..cc37123883c7a 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs @@ -25,6 +25,7 @@ internal static class ElfNative public const ushort EM_ARM = 40; public const ushort EM_X86_64 = 62; public const ushort EM_AARCH64 = 183; + public const ushort EM_RISCV = 243; public const ushort EM_LOONGARCH = 258; // Section header type @@ -553,5 +554,94 @@ internal static class ElfNative public const uint R_LARCH_TLS_LD_PCREL20_S2 = 124; public const uint R_LARCH_TLS_GD_PCREL20_S2 = 125; public const uint R_LARCH_TLS_DESC_PCREL20_S2 = 126; + + // Relocations (riscv) + public const uint R_RISCV_NONE = 0; + public const uint R_RISCV_32 = 1; + public const uint R_RISCV_64 = 2; + public const uint R_RISCV_RELATIVE = 3; + public const uint R_RISCV_COPY = 4; + public const uint R_RISCV_JUMP_SLOT = 5; + public const uint R_RISCV_TLS_DTPMOD32 = 6; + public const uint R_RISCV_TLS_DTPMOD64 = 7; + public const uint R_RISCV_TLS_DTPREL32 = 8; + public const uint R_RISCV_TLS_DTPREL64 = 9; + public const uint R_RISCV_TLS_TPREL32 = 10; + public const uint R_RISCV_TLS_TPREL64 = 11; + public const uint R_RISCV_IRELATIVE = 12; + public const uint R_RISCV_ADD8 = 13; + public const uint R_RISCV_ADD16 = 14; + public const uint R_RISCV_ADD32 = 15; + public const uint R_RISCV_ADD64 = 16; + public const uint R_RISCV_SUB8 = 17; + public const uint R_RISCV_SUB16 = 18; + public const uint R_RISCV_SUB32 = 19; + public const uint R_RISCV_SUB64 = 20; + public const uint R_RISCV_GOT_HI20 = 21; + public const uint R_RISCV_GOT_LO12 = 22; + public const uint R_RISCV_GOT64_HI20 = 23; + public const uint R_RISCV_GOT64_LO12 = 24; + public const uint R_RISCV_TLS_GD_HI20 = 25; + public const uint R_RISCV_TLS_GD_LO12 = 26; + public const uint R_RISCV_TLS_GD_ADD = 27; + public const uint R_RISCV_TLS_GD_CALL = 28; + public const uint R_RISCV_TLS_LD_HI20 = 29; + public const uint R_RISCV_TLS_LD_LO12 = 30; + public const uint R_RISCV_TLS_LD_ADD = 31; + public const uint R_RISCV_TLS_LD_CALL = 32; + public const uint R_RISCV_TLS_IE_HI20 = 33; + public const uint R_RISCV_TLS_IE_LO12 = 34; + public const uint R_RISCV_TLS_IE_ADD = 35; + public const uint R_RISCV_TLS_IE_CALL = 36; + public const uint R_RISCV_TLS_TPREL_HI20 = 37; + public const uint R_RISCV_TLS_TPREL_LO12 = 38; + public const uint R_RISCV_TLS_TPREL_ADD = 39; + public const uint R_RISCV_TLS_TPREL_CALL = 40; + public const uint R_RISCV_BRANCH = 41; + public const uint R_RISCV_JAL = 42; + public const uint R_RISCV_CALL = 43; + public const uint R_RISCV_CALL_PLT = 44; + public const uint R_RISCV_GOT = 45; + public const uint R_RISCV_PLT = 46; + public const uint R_RISCV_PLT32 = 47; + public const uint R_RISCV_PLT64 = 48; + public const uint R_RISCV_COPY64 = 49; + public const uint R_RISCV_RELATIVE64 = 50; + public const uint R_RISCV_64_ADD = 54; + public const uint R_RISCV_64_SUB = 55; + public const uint R_RISCV_64_HI20 = 56; + public const uint R_RISCV_64_LO12 = 57; + public const uint R_RISCV_RELAX = 58; + public const uint R_RISCV_ALIGN = 59; + public const uint R_RISCV_ADD_32 = 60; + public const uint R_RISCV_SUB_32 = 61; + public const uint R_RISCV_ADD_64 = 62; + public const uint R_RISCV_SUB_64 = 63; + public const uint R_RISCV_CALL32 = 64; + public const uint R_RISCV_CALL64 = 65; + public const uint R_RISCV_JUMP32 = 66; + public const uint R_RISCV_JUMP64 = 67; + public const uint R_RISCV_GOT32 = 68; + public const uint R_RISCV_GOT64 = 69; + public const uint R_RISCV_PCREL_HI20 = 70; + public const uint R_RISCV_PCREL_LO12 = 71; + public const uint R_RISCV_CALL_HI20 = 72; + public const uint R_RISCV_CALL_LO12 = 73; + public const uint R_RISCV_JUMP_HI20 = 74; + public const uint R_RISCV_JUMP_LO12 = 75; + public const uint R_RISCV_PCREL_LO12_I = 76; + public const uint R_RISCV_PCREL_LO12_S = 77; + public const uint R_RISCV_PCREL_LO12_F = 78; + public const uint R_RISCV_PCREL_LO12_J = 79; + public const uint R_RISCV_JUMP_PCREL_LO12 = 80; + public const uint R_RISCV_JUMP_PCREL_HI20 = 81; + public const uint R_RISCV_CALL_PCREL_LO12 = 82; + public const uint R_RISCV_CALL_PCREL_HI20 = 83; + public const uint R_RISCV_TLS_DTPMOD64_HI20 = 84; + public const uint R_RISCV_TLS_DTPMOD64_LO12 = 85; + public const uint R_RISCV_TLS_DTPREL64_HI20 = 86; + public const uint R_RISCV_TLS_DTPREL64_LO12 = 87; + public const uint R_RISCV_TLS_TPREL64_HI20 = 88; + public const uint R_RISCV_TLS_TPREL64_LO12 = 89; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 9f3d877b602b6..688e96b68bb7a 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -60,6 +60,7 @@ public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options) TargetArchitecture.ARM => EM_ARM, TargetArchitecture.ARM64 => EM_AARCH64, TargetArchitecture.LoongArch64 => EM_LOONGARCH, + TargetArchitecture.RiscV64 => EM_RISCV, _ => throw new NotSupportedException("Unsupported architecture") }; _useInlineRelocationAddends = _machine is EM_386 or EM_ARM; @@ -362,6 +363,9 @@ private protected override void EmitRelocations(int sectionIndex, List relocationList) + { + if (relocationList.Count > 0) + { + Span relocationEntry = stackalloc byte[24]; + var relocationStream = new MemoryStream(24 * relocationList.Count); + _sections[sectionIndex].RelocationStream = relocationStream; + + foreach (SymbolicRelocation symbolicRelocation in relocationList) + { + uint symbolIndex = _symbolNameToIndex[symbolicRelocation.SymbolName]; + uint type = symbolicRelocation.Type switch + { + IMAGE_REL_BASED_DIR64 => R_RISCV_64, + IMAGE_REL_BASED_HIGHLOW => R_RISCV_32, + IMAGE_REL_BASED_RELPTR32 => R_RISCV_RELATIVE, + IMAGE_REL_BASED_RISCV64_PC => R_RISCV_PCREL_HI20, + IMAGE_REL_BASED_RISCV64_JALR => R_RISCV_CALL32, + _ => throw new NotSupportedException("Unknown relocation type: " + symbolicRelocation.Type) + }; + + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type); + BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); + relocationStream.Write(relocationEntry); + + if (symbolicRelocation.Type is IMAGE_REL_BASED_RISCV64_PC) + { + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset + 4); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type + 1); + BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); + relocationStream.Write(relocationEntry); + } + } + } + } + private protected override void EmitSectionsAndLayout() { if (_machine == EM_ARM) @@ -805,6 +846,7 @@ private void EmitObjectFile(FileStream outputFileStream) { EM_ARM => 0x05000000u, // For ARM32 claim conformance with the EABI specification EM_LOONGARCH => 0x43u, // For LoongArch ELF psABI specify the ABI version (1) and modifiers (64-bit GPRs, 64-bit FPRs) + EM_RISCV => 0x08u, // For RISC-V, specify the ABI or architecture-specific version if applicable _ => 0u }, }; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 34c5a1a3b16cc..fab907801e723 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -1991,12 +1991,12 @@ private int SizeOfPInvokeTransitionFrame // m_RIP (1) // m_FramePointer (1) // m_pThread - // m_Flags + align (no align for ARM64/LoongArch64 that has 64 bit m_Flags) + // m_Flags + align (no align for ARM64/LoongArch64/RiscV64 that has 64 bit m_Flags) // m_PreservedRegs - RSP / R9 (2) // No need to save other preserved regs because of the JIT ensures that there are // no live GC references in callee saved registers around the PInvoke callsite. // - // (1) On ARM32/ARM64/LoongArch64 the order of m_RIP and m_FramePointer is reverse + // (1) On ARM32/ARM64/LoongArch64/RiscV64 the order of m_RIP and m_FramePointer is reverse // (2) R9 is saved for ARM32 because it needs to be preserved for methods with stackalloc int size = 5 * this.PointerSize; diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 40c5c686c6583..e97c93b5a917e 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -2017,7 +2017,7 @@ OBJECTREF* GcInfoDecoder::GetRegisterSlot( _ASSERTE((regNum == 1) || (regNum >= 5 && regNum <= 31)); #ifdef FEATURE_NATIVEAOT - PTR_uintptr_t* ppReg = &pRD->pR0; + PTR_uintptr_t* ppReg = &pRD->pRA; return (OBJECTREF*)*(ppReg + regNum); #else diff --git a/src/native/external/llvm-libunwind/include/__libunwind_config.h b/src/native/external/llvm-libunwind/include/__libunwind_config.h index d521890f17f86..39d48af424f5f 100644 --- a/src/native/external/llvm-libunwind/include/__libunwind_config.h +++ b/src/native/external/llvm-libunwind/include/__libunwind_config.h @@ -151,7 +151,7 @@ # else # define RISCV_FLEN 0 # endif -# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) +# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) + 32 # if __riscv_xlen == 32 # define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 7) # elif __riscv_xlen == 64 diff --git a/src/native/external/llvm-libunwind/src/Registers.hpp b/src/native/external/llvm-libunwind/src/Registers.hpp index b76f24ea67da5..600c56bb92ff3 100644 --- a/src/native/external/llvm-libunwind/src/Registers.hpp +++ b/src/native/external/llvm-libunwind/src/Registers.hpp @@ -4240,13 +4240,14 @@ class _LIBUNWIND_HIDDEN Registers_riscv { bool validRegister(int num) const; reg_t getRegister(int num) const; - void setRegister(int num, reg_t value); + void setRegister(int num, reg_t value, uint64_t location); bool validFloatRegister(int num) const; fp_t getFloatRegister(int num) const; void setFloatRegister(int num, fp_t value); bool validVectorRegister(int num) const; v128 getVectorRegister(int num) const; void setVectorRegister(int num, v128 value); + uint64_t getRegisterLocation(int num) const; static const char *getRegisterName(int num); void jumpto(); static constexpr int lastDwarfRegNum() { @@ -4255,13 +4256,14 @@ class _LIBUNWIND_HIDDEN Registers_riscv { static int getArch() { return REGISTERS_RISCV; } reg_t getSP() const { return _registers[2]; } - void setSP(reg_t value) { _registers[2] = value; } + void setSP(reg_t value, uint64_t location) { _registers[2] = value; } reg_t getIP() const { return _registers[0]; } - void setIP(reg_t value) { _registers[0] = value; } + void setIP(reg_t value, uint64_t location) { _registers[0] = value; } private: // _registers[0] holds the pc reg_t _registers[32]; + reg_t _registerLocations[32]; # if defined(__riscv_flen) fp_t _floats[32]; # endif @@ -4271,6 +4273,7 @@ inline Registers_riscv::Registers_riscv(const void *registers) { static_assert((check_fit::does_fit), "riscv registers do not fit into unw_context_t"); memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); # if __riscv_xlen == 32 static_assert(sizeof(_registers) == 0x80, "expected float registers to be at offset 128"); @@ -4290,6 +4293,7 @@ inline Registers_riscv::Registers_riscv(const void *registers) { inline Registers_riscv::Registers_riscv() { memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); # if defined(__riscv_flen) memset(&_floats, 0, sizeof(_floats)); # endif @@ -4326,20 +4330,41 @@ inline reg_t Registers_riscv::getRegister(int regNum) const { _LIBUNWIND_ABORT("unsupported riscv register"); } -inline void Registers_riscv::setRegister(int regNum, reg_t value) { - if (regNum == UNW_REG_IP) +inline void Registers_riscv::setRegister(int regNum, reg_t value, uint64_t location) { + if (regNum == UNW_REG_IP) { _registers[0] = value; + _registerLocations[0] = value; + } else if (regNum == UNW_REG_SP) _registers[2] = value; else if (regNum == UNW_RISCV_X0) /* x0 is hardwired to zero */ return; - else if ((regNum > 0) && (regNum < 32)) + else if ((regNum > 0) && (regNum < 32)) { _registers[regNum] = value; + _registerLocations[regNum - UNW_RISCV_X0] = location; + } else _LIBUNWIND_ABORT("unsupported riscv register"); } +inline uint64_t Registers_riscv::getRegisterLocation(int regNum) const { + if (regNum == UNW_REG_IP) + return _registerLocations[0]; + if (regNum == UNW_REG_SP) + return _registerLocations[2]; + if (regNum == UNW_RISCV_X0) + return 0; + if ((regNum > 0) && (regNum < 32)) + return _registerLocations[regNum]; + if (regNum == UNW_RISCV_VLENB) { + reg_t vlenb; + __asm__("csrr %0, 0xC22" : "=r"(vlenb)); + return vlenb; + } + _LIBUNWIND_ABORT("unsupported riscv register"); +} + inline const char *Registers_riscv::getRegisterName(int regNum) { switch (regNum) { case UNW_REG_IP: diff --git a/src/native/external/llvm-libunwind/src/UnwindCursor.hpp b/src/native/external/llvm-libunwind/src/UnwindCursor.hpp index 606ba0b0a8d31..0920b2e5e0a79 100644 --- a/src/native/external/llvm-libunwind/src/UnwindCursor.hpp +++ b/src/native/external/llvm-libunwind/src/UnwindCursor.hpp @@ -2819,10 +2819,10 @@ int UnwindCursor::stepThroughSigReturn(Registers_riscv &) { const pint_t kOffsetSpToSigcontext = 128 + 8 + 8 + 24 + 8 + 128; const pint_t sigctx = _registers.getSP() + kOffsetSpToSigcontext; - _registers.setIP(_addressSpace.get64(sigctx)); + _registers.setIP(_addressSpace.get64(sigctx), 0); for (int i = UNW_RISCV_X1; i <= UNW_RISCV_X31; ++i) { uint64_t value = _addressSpace.get64(sigctx + static_cast(i * 8)); - _registers.setRegister(i, value); + _registers.setRegister(i, value, 0); } _isSignalFrame = true; return UNW_STEP_SUCCESS;