diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index cc85821174ae46..7ea9d0333b0353 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -15,6 +15,7 @@ endif (MSVC) # Set commonly used directory names set(CLR_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(RUNTIME_DIR ${CMAKE_CURRENT_SOURCE_DIR}/runtime) set(VM_DIR ${CMAKE_CURRENT_SOURCE_DIR}/vm) set(GENERATED_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/inc) set(GENERATED_EVENTING_DIR ${CMAKE_CURRENT_BINARY_DIR}/Eventing) diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 0327aefce005d6..5ff5149ad80730 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -44,11 +44,11 @@ if(NOT DEFINED FEATURE_INTERPRETER) set(FEATURE_PORTABLE_ENTRYPOINTS 1) set(FEATURE_PORTABLE_HELPERS 1) else() - if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) set(FEATURE_INTERPRETER $,1,0>) - else(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + else(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) set(FEATURE_INTERPRETER 0) - endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_RISCV64) endif() endif(NOT DEFINED FEATURE_INTERPRETER) diff --git a/src/coreclr/debug/CMakeLists.txt b/src/coreclr/debug/CMakeLists.txt index 0d52fa77527ea3..5a0a420346882f 100644 --- a/src/coreclr/debug/CMakeLists.txt +++ b/src/coreclr/debug/CMakeLists.txt @@ -2,6 +2,8 @@ add_compile_definitions($<${FEATURE_CORECLR_CACHED_INTERFACE_DISPATCH}:FEATURE_CACHED_INTERFACE_DISPATCH>) add_compile_definitions($<${FEATURE_CORECLR_VIRTUAL_STUB_DISPATCH}:FEATURE_VIRTUAL_STUB_DISPATCH>) +include_directories(${RUNTIME_DIR}) + add_subdirectory(daccess) add_subdirectory(ee) add_subdirectory(di) diff --git a/src/coreclr/dlls/CMakeLists.txt b/src/coreclr/dlls/CMakeLists.txt index 0a2ab14d8db82f..05bfa49756c220 100644 --- a/src/coreclr/dlls/CMakeLists.txt +++ b/src/coreclr/dlls/CMakeLists.txt @@ -1,3 +1,5 @@ +include_directories(${RUNTIME_DIR}) + if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) add_subdirectory(clretwrc) endif(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) diff --git a/src/coreclr/dlls/mscordac/CMakeLists.txt b/src/coreclr/dlls/mscordac/CMakeLists.txt index dc3b79d6933165..0031a9b323c2d9 100644 --- a/src/coreclr/dlls/mscordac/CMakeLists.txt +++ b/src/coreclr/dlls/mscordac/CMakeLists.txt @@ -1,7 +1,6 @@ add_definitions(-DFEATURE_NO_HOST) -set(CLR_DAC_SOURCES -) +set(CLR_DAC_SOURCES) add_definitions(-DFX_VER_INTERNALNAME_STR=mscordaccore.dll) diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 933ae377ab566a..c3dd534aa2f76b 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -1,5 +1,4 @@ set(GC_DIR ../../gc) -set(RUNTIME_DIR ../../runtime) set(COMMON_RUNTIME_SOURCES allocheap.cpp diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc index 60b4ce6aad87b9..b1c56060127207 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosriscv64.inc @@ -154,75 +154,7 @@ C_FUNC(\Name): jalr \reg .endm -// Loads the address of a thread-local variable into the target register. -// The target register cannot be a0. -.macro INLINE_GET_TLS_VAR target, var, ofs = 0 - .ifc \target, a0 - .error "target cannot be a0" - .endif - - addi sp, sp, -72 - sd ra, 64(sp) - sd t1, 56(sp) - sd a1, 48(sp) - sd a2, 40(sp) - sd a3, 32(sp) - sd a4, 24(sp) - sd a5, 16(sp) - sd a6, 8(sp) - sd a7, 0(sp) - - // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic - la.tls.gd a0, \var - call C_FUNC(__tls_get_addr) - - ld ra, 64(sp) - ld t1, 56(sp) - ld a1, 48(sp) - ld a2, 40(sp) - ld a3, 32(sp) - ld a4, 24(sp) - ld a5, 16(sp) - ld a6, 8(sp) - ld a7, 0(sp) - addi sp, sp, 72 - - add \target, a0, \ofs - - /* - In the future we should switch to TLS descriptors. Its support was added in 2024 in glibc, musl, llvm, gcc and binutils, - which is currently unavailable on majority devices. See https://maskray.me/blog/2024-01-23-riscv-tlsdesc-works - - When the support for TLS descriptors is available in NativeAOT baseline, actions to perform: - * Apply this patch: - ``` - --- a/src/coreclr/nativeaot/CMakeLists.txt - +++ b/src/coreclr/nativeaot/CMakeLists.txt - @@ -30,6 +30,8 @@ endif (CLR_CMAKE_HOST_UNIX) - - if(CLR_CMAKE_TARGET_ANDROID) - add_definitions(-DFEATURE_EMULATED_TLS) - +elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) - + add_definitions(-mtls-dialect=desc) - endif() - - add_subdirectory(Bootstrap) - ``` - * Remove global dynamic code including prolog and epilog. - * Uncomment the following code and remove these comments. - - // TLS descriptor, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#tls-descriptors - auipc a0, %tlsdesc_hi(\var) - lw t0, %tlsdesc_load_lo(\var)(a0) - addi a0, a0, %tlsdesc_add_lo(\var) - jalr t0, 0(t0), %tlsdesc_call(\var) - add \target, tp, a0 - .ifnc \ofs, 0 - add \target, \target, \ofs - .endif - - */ -.endm +#include // Inlined version of RhpGetThread. Target cannot be x0. .macro INLINE_GETTHREAD target diff --git a/src/coreclr/pal/CMakeLists.txt b/src/coreclr/pal/CMakeLists.txt index 580bccb7cd6301..641995368d1879 100644 --- a/src/coreclr/pal/CMakeLists.txt +++ b/src/coreclr/pal/CMakeLists.txt @@ -2,6 +2,7 @@ project(COREPAL) include(../clrfeatures.cmake) +include_directories(${RUNTIME_DIR}) include_directories(${COREPAL_SOURCE_DIR}/inc) include_directories(${COREPAL_SOURCE_DIR}/src) include_directories(${COREPAL_SOURCE_DIR}/../inc) diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc index 8201a03817f9c7..ead0d6b550d232 100644 --- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc @@ -376,3 +376,11 @@ C_FUNC(\Name): 0: #endif .endm + +#include + +// Inlined version of RhpGetThread +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, C_FUNC(t_CurrentThreadInfo) + ld \target, OFFSETOF__ThreadLocalInfo__m_pThread(\target) +.endm diff --git a/src/coreclr/runtime/riscv64/InlineTls.inc b/src/coreclr/runtime/riscv64/InlineTls.inc new file mode 100644 index 00000000000000..a29f7386b80939 --- /dev/null +++ b/src/coreclr/runtime/riscv64/InlineTls.inc @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Loads the address of a thread-local variable into the target register. +// The target register cannot be a0. +.macro INLINE_GET_TLS_VAR target, var, ofs = 0 + .ifc \target, a0 + .error "target cannot be a0" + .endif + + addi sp, sp, -72 + sd ra, 64(sp) + sd t1, 56(sp) + sd a1, 48(sp) + sd a2, 40(sp) + sd a3, 32(sp) + sd a4, 24(sp) + sd a5, 16(sp) + sd a6, 8(sp) + sd a7, 0(sp) + + // global dynamic TLS, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#global-dynamic + la.tls.gd a0, \var + call C_FUNC(__tls_get_addr) + + ld ra, 64(sp) + ld t1, 56(sp) + ld a1, 48(sp) + ld a2, 40(sp) + ld a3, 32(sp) + ld a4, 24(sp) + ld a5, 16(sp) + ld a6, 8(sp) + ld a7, 0(sp) + addi sp, sp, 72 + + add \target, a0, \ofs + + /* + In the future we should switch to TLS descriptors. Its support was added in 2024 in glibc, musl, llvm, gcc and binutils, + which is currently unavailable on majority devices. See https://maskray.me/blog/2024-01-23-riscv-tlsdesc-works + + When the support for TLS descriptors is available in NativeAOT baseline, actions to perform: + * Apply this patch: + ``` + --- a/src/coreclr/nativeaot/CMakeLists.txt + +++ b/src/coreclr/nativeaot/CMakeLists.txt + @@ -30,6 +30,8 @@ endif (CLR_CMAKE_HOST_UNIX) + + if(CLR_CMAKE_TARGET_ANDROID) + add_definitions(-DFEATURE_EMULATED_TLS) + +elseif(CLR_CMAKE_TARGET_ARCH_RISCV64) + + add_definitions(-mtls-dialect=desc) + endif() + + add_subdirectory(Bootstrap) + ``` + * Remove global dynamic code including prolog and epilog. + * Uncomment the following code and remove these comments. + + // TLS descriptor, see https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/eb2b2962/riscv-elf.adoc#tls-descriptors + auipc a0, %tlsdesc_hi(\var) + lw t0, %tlsdesc_load_lo(\var)(a0) + addi a0, a0, %tlsdesc_add_lo(\var) + jalr t0, 0(t0), %tlsdesc_call(\var) + add \target, tp, a0 + .ifnc \ofs, 0 + add \target, \target, \ofs + .endif + + */ +.endm diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 3535e5c14990af..5778018d548f22 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -1,7 +1,5 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) -set(RUNTIME_DIR ../runtime) - # Needed due to the munged files being in the binary folders, the set(CMAKE_INCLUDE_CURRENT_DIR ON) is not enough include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${ARCH_SOURCES_DIR}) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 8e474870ef7cfe..ce0c58a90fbd47 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1402,6 +1402,472 @@ PCODE FPRegs32LoadRoutines[] = #endif // TARGET_ARM64 +#ifdef TARGET_RISCV64 + +extern "C" void Load_A0(); +extern "C" void Load_A0_A1(); +extern "C" void Load_A0_A1_A2(); +extern "C" void Load_A0_A1_A2_A3(); +extern "C" void Load_A0_A1_A2_A3_A4(); +extern "C" void Load_A0_A1_A2_A3_A4_A5(); +extern "C" void Load_A0_A1_A2_A3_A4_A5_A6(); +extern "C" void Load_A0_A1_A2_A3_A4_A5_A6_A7(); +extern "C" void Load_A1(); +extern "C" void Load_A1_A2(); +extern "C" void Load_A1_A2_A3(); +extern "C" void Load_A1_A2_A3_A4(); +extern "C" void Load_A1_A2_A3_A4_A5(); +extern "C" void Load_A1_A2_A3_A4_A5_A6(); +extern "C" void Load_A1_A2_A3_A4_A5_A6_A7(); +extern "C" void Load_A2(); +extern "C" void Load_A2_A3(); +extern "C" void Load_A2_A3_A4(); +extern "C" void Load_A2_A3_A4_A5(); +extern "C" void Load_A2_A3_A4_A5_A6(); +extern "C" void Load_A2_A3_A4_A5_A6_A7(); +extern "C" void Load_A3(); +extern "C" void Load_A3_A4(); +extern "C" void Load_A3_A4_A5(); +extern "C" void Load_A3_A4_A5_A6(); +extern "C" void Load_A3_A4_A5_A6_A7(); +extern "C" void Load_A4(); +extern "C" void Load_A4_A5(); +extern "C" void Load_A4_A5_A6(); +extern "C" void Load_A4_A5_A6_A7(); +extern "C" void Load_A5(); +extern "C" void Load_A5_A6(); +extern "C" void Load_A5_A6_A7(); +extern "C" void Load_A6(); +extern "C" void Load_A6_A7(); +extern "C" void Load_A7(); + +extern "C" void Store_A0(); +extern "C" void Store_A0_A1(); +extern "C" void Store_A0_A1_A2(); +extern "C" void Store_A0_A1_A2_A3(); +extern "C" void Store_A0_A1_A2_A3_A4(); +extern "C" void Store_A0_A1_A2_A3_A4_A5(); +extern "C" void Store_A0_A1_A2_A3_A4_A5_A6(); +extern "C" void Store_A0_A1_A2_A3_A4_A5_A6_A7(); +extern "C" void Store_A1(); +extern "C" void Store_A1_A2(); +extern "C" void Store_A1_A2_A3(); +extern "C" void Store_A1_A2_A3_A4(); +extern "C" void Store_A1_A2_A3_A4_A5(); +extern "C" void Store_A1_A2_A3_A4_A5_A6(); +extern "C" void Store_A1_A2_A3_A4_A5_A6_A7(); +extern "C" void Store_A2(); +extern "C" void Store_A2_A3(); +extern "C" void Store_A2_A3_A4(); +extern "C" void Store_A2_A3_A4_A5(); +extern "C" void Store_A2_A3_A4_A5_A6(); +extern "C" void Store_A2_A3_A4_A5_A6_A7(); +extern "C" void Store_A3(); +extern "C" void Store_A3_A4(); +extern "C" void Store_A3_A4_A5(); +extern "C" void Store_A3_A4_A5_A6(); +extern "C" void Store_A3_A4_A5_A6_A7(); +extern "C" void Store_A4(); +extern "C" void Store_A4_A5(); +extern "C" void Store_A4_A5_A6(); +extern "C" void Store_A4_A5_A6_A7(); +extern "C" void Store_A5(); +extern "C" void Store_A5_A6(); +extern "C" void Store_A5_A6_A7(); +extern "C" void Store_A6(); +extern "C" void Store_A6_A7(); +extern "C" void Store_A7(); + +extern "C" void Load_Ref_A0(); +extern "C" void Load_Ref_A1(); +extern "C" void Load_Ref_A2(); +extern "C" void Load_Ref_A3(); +extern "C" void Load_Ref_A4(); +extern "C" void Load_Ref_A5(); +extern "C" void Load_Ref_A6(); +extern "C" void Load_Ref_A7(); + +extern "C" void Store_Ref_A0(); +extern "C" void Store_Ref_A1(); +extern "C" void Store_Ref_A2(); +extern "C" void Store_Ref_A3(); +extern "C" void Store_Ref_A4(); +extern "C" void Store_Ref_A5(); +extern "C" void Store_Ref_A6(); +extern "C" void Store_Ref_A7(); + +PCODE GPRegsRoutines[] = +{ + (PCODE)Load_A0, // 00 + (PCODE)Load_A0_A1, // 01 + (PCODE)Load_A0_A1_A2, // 02 + (PCODE)Load_A0_A1_A2_A3, // 03 + (PCODE)Load_A0_A1_A2_A3_A4, // 04 + (PCODE)Load_A0_A1_A2_A3_A4_A5, // 05 + (PCODE)Load_A0_A1_A2_A3_A4_A5_A6, // 06 + (PCODE)Load_A0_A1_A2_A3_A4_A5_A6_A7, // 07 + (PCODE)0, // 10 + (PCODE)Load_A1, // 11 + (PCODE)Load_A1_A2, // 12 + (PCODE)Load_A1_A2_A3, // 13 + (PCODE)Load_A1_A2_A3_A4, // 14 + (PCODE)Load_A1_A2_A3_A4_A5, // 15 + (PCODE)Load_A1_A2_A3_A4_A5_A6, // 16 + (PCODE)Load_A1_A2_A3_A4_A5_A6_A7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_A2, // 22 + (PCODE)Load_A2_A3, // 23 + (PCODE)Load_A2_A3_A4, // 24 + (PCODE)Load_A2_A3_A4_A5, // 25 + (PCODE)Load_A2_A3_A4_A5_A6, // 26 + (PCODE)Load_A2_A3_A4_A5_A6_A7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_A3, // 33 + (PCODE)Load_A3_A4, // 34 + (PCODE)Load_A3_A4_A5, // 35 + (PCODE)Load_A3_A4_A5_A6, // 36 + (PCODE)Load_A3_A4_A5_A6_A7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Load_A4, // 44 + (PCODE)Load_A4_A5, // 45 + (PCODE)Load_A4_A5_A6, // 46 + (PCODE)Load_A4_A5_A6_A7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Load_A5, // 55 + (PCODE)Load_A5_A6, // 56 + (PCODE)Load_A5_A6_A7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Load_A6, // 66 + (PCODE)Load_A6_A7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Load_A7 // 77 +}; + +PCODE GPRegsStoreRoutines[] = +{ + (PCODE)Store_A0, // 00 + (PCODE)Store_A0_A1, // 01 + (PCODE)Store_A0_A1_A2, // 02 + (PCODE)Store_A0_A1_A2_A3, // 03 + (PCODE)Store_A0_A1_A2_A3_A4, // 04 + (PCODE)Store_A0_A1_A2_A3_A4_A5, // 05 + (PCODE)Store_A0_A1_A2_A3_A4_A5_A6, // 06 + (PCODE)Store_A0_A1_A2_A3_A4_A5_A6_A7, // 07 + (PCODE)0, // 10 + (PCODE)Store_A1, // 11 + (PCODE)Store_A1_A2, // 12 + (PCODE)Store_A1_A2_A3, // 13 + (PCODE)Store_A1_A2_A3_A4, // 14 + (PCODE)Store_A1_A2_A3_A4_A5, // 15 + (PCODE)Store_A1_A2_A3_A4_A5_A6, // 16 + (PCODE)Store_A1_A2_A3_A4_A5_A6_A7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_A2, // 22 + (PCODE)Store_A2_A3, // 23 + (PCODE)Store_A2_A3_A4, // 24 + (PCODE)Store_A2_A3_A4_A5, // 25 + (PCODE)Store_A2_A3_A4_A5_A6, // 26 + (PCODE)Store_A2_A3_A4_A5_A6_A7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_A3, // 33 + (PCODE)Store_A3_A4, // 34 + (PCODE)Store_A3_A4_A5, // 35 + (PCODE)Store_A3_A4_A5_A6, // 36 + (PCODE)Store_A3_A4_A5_A6_A7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Store_A4, // 44 + (PCODE)Store_A4_A5, // 45 + (PCODE)Store_A4_A5_A6, // 46 + (PCODE)Store_A4_A5_A6_A7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Store_A5, // 55 + (PCODE)Store_A5_A6, // 56 + (PCODE)Store_A5_A6_A7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Store_A6, // 66 + (PCODE)Store_A6_A7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Store_A7 // 77 +}; + +extern "C" void Load_FA0(); +extern "C" void Load_FA0_FA1(); +extern "C" void Load_FA0_FA1_FA2(); +extern "C" void Load_FA0_FA1_FA2_FA3(); +extern "C" void Load_FA0_FA1_FA2_FA3_FA4(); +extern "C" void Load_FA0_FA1_FA2_FA3_FA4_FA5(); +extern "C" void Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6(); +extern "C" void Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Load_FA1(); +extern "C" void Load_FA1_FA2(); +extern "C" void Load_FA1_FA2_FA3(); +extern "C" void Load_FA1_FA2_FA3_FA4(); +extern "C" void Load_FA1_FA2_FA3_FA4_FA5(); +extern "C" void Load_FA1_FA2_FA3_FA4_FA5_FA6(); +extern "C" void Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Load_FA2(); +extern "C" void Load_FA2_FA3(); +extern "C" void Load_FA2_FA3_FA4(); +extern "C" void Load_FA2_FA3_FA4_FA5(); +extern "C" void Load_FA2_FA3_FA4_FA5_FA6(); +extern "C" void Load_FA2_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Load_FA3(); +extern "C" void Load_FA3_FA4(); +extern "C" void Load_FA3_FA4_FA5(); +extern "C" void Load_FA3_FA4_FA5_FA6(); +extern "C" void Load_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Load_FA4(); +extern "C" void Load_FA4_FA5(); +extern "C" void Load_FA4_FA5_FA6(); +extern "C" void Load_FA4_FA5_FA6_FA7(); +extern "C" void Load_FA5(); +extern "C" void Load_FA5_FA6(); +extern "C" void Load_FA5_FA6_FA7(); +extern "C" void Load_FA6(); +extern "C" void Load_FA6_FA7(); +extern "C" void Load_FA7(); + +extern "C" void Store_FA0(); +extern "C" void Store_FA0_FA1(); +extern "C" void Store_FA0_FA1_FA2(); +extern "C" void Store_FA0_FA1_FA2_FA3(); +extern "C" void Store_FA0_FA1_FA2_FA3_FA4(); +extern "C" void Store_FA0_FA1_FA2_FA3_FA4_FA5(); +extern "C" void Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6(); +extern "C" void Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Store_FA1(); +extern "C" void Store_FA1_FA2(); +extern "C" void Store_FA1_FA2_FA3(); +extern "C" void Store_FA1_FA2_FA3_FA4(); +extern "C" void Store_FA1_FA2_FA3_FA4_FA5(); +extern "C" void Store_FA1_FA2_FA3_FA4_FA5_FA6(); +extern "C" void Store_FA1_FA2_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Store_FA2(); +extern "C" void Store_FA2_FA3(); +extern "C" void Store_FA2_FA3_FA4(); +extern "C" void Store_FA2_FA3_FA4_FA5(); +extern "C" void Store_FA2_FA3_FA4_FA5_FA6(); +extern "C" void Store_FA2_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Store_FA3(); +extern "C" void Store_FA3_FA4(); +extern "C" void Store_FA3_FA4_FA5(); +extern "C" void Store_FA3_FA4_FA5_FA6(); +extern "C" void Store_FA3_FA4_FA5_FA6_FA7(); +extern "C" void Store_FA4(); +extern "C" void Store_FA4_FA5(); +extern "C" void Store_FA4_FA5_FA6(); +extern "C" void Store_FA4_FA5_FA6_FA7(); +extern "C" void Store_FA5(); +extern "C" void Store_FA5_FA6(); +extern "C" void Store_FA5_FA6_FA7(); +extern "C" void Store_FA6(); +extern "C" void Store_FA6_FA7(); +extern "C" void Store_FA7(); + +PCODE FPRegsRoutines[] = +{ + (PCODE)Load_FA0, // 00 + (PCODE)Load_FA0_FA1, // 01 + (PCODE)Load_FA0_FA1_FA2, // 02 + (PCODE)Load_FA0_FA1_FA2_FA3, // 03 + (PCODE)Load_FA0_FA1_FA2_FA3_FA4, // 04 + (PCODE)Load_FA0_FA1_FA2_FA3_FA4_FA5, // 05 + (PCODE)Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6, // 06 + (PCODE)Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7, // 07 + (PCODE)0, // 10 + (PCODE)Load_FA1, // 11 + (PCODE)Load_FA1_FA2, // 12 + (PCODE)Load_FA1_FA2_FA3, // 13 + (PCODE)Load_FA1_FA2_FA3_FA4, // 14 + (PCODE)Load_FA1_FA2_FA3_FA4_FA5, // 15 + (PCODE)Load_FA1_FA2_FA3_FA4_FA5_FA6, // 16 + (PCODE)Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_FA2, // 22 + (PCODE)Load_FA2_FA3, // 23 + (PCODE)Load_FA2_FA3_FA4, // 24 + (PCODE)Load_FA2_FA3_FA4_FA5, // 25 + (PCODE)Load_FA2_FA3_FA4_FA5_FA6, // 26 + (PCODE)Load_FA2_FA3_FA4_FA5_FA6_FA7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_FA3, // 33 + (PCODE)Load_FA3_FA4, // 34 + (PCODE)Load_FA3_FA4_FA5, // 35 + (PCODE)Load_FA3_FA4_FA5_FA6, // 36 + (PCODE)Load_FA3_FA4_FA5_FA6_FA7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Load_FA4, // 44 + (PCODE)Load_FA4_FA5, // 45 + (PCODE)Load_FA4_FA5_FA6, // 46 + (PCODE)Load_FA4_FA5_FA6_FA7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Load_FA5, // 55 + (PCODE)Load_FA5_FA6, // 56 + (PCODE)Load_FA5_FA6_FA7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Load_FA6, // 66 + (PCODE)Load_FA6_FA7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Load_FA7 // 77 +}; + +PCODE FPRegsStoreRoutines[] = +{ + (PCODE)Store_FA0, // 00 + (PCODE)Store_FA0_FA1, // 01 + (PCODE)Store_FA0_FA1_FA2, // 02 + (PCODE)Store_FA0_FA1_FA2_FA3, // 03 + (PCODE)Store_FA0_FA1_FA2_FA3_FA4, // 04 + (PCODE)Store_FA0_FA1_FA2_FA3_FA4_FA5, // 05 + (PCODE)Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6, // 06 + (PCODE)Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7, // 07 + (PCODE)0, // 10 + (PCODE)Store_FA1, // 11 + (PCODE)Store_FA1_FA2, // 12 + (PCODE)Store_FA1_FA2_FA3, // 13 + (PCODE)Store_FA1_FA2_FA3_FA4, // 14 + (PCODE)Store_FA1_FA2_FA3_FA4_FA5, // 15 + (PCODE)Store_FA1_FA2_FA3_FA4_FA5_FA6, // 16 + (PCODE)Store_FA1_FA2_FA3_FA4_FA5_FA6_FA7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Store_FA2, // 22 + (PCODE)Store_FA2_FA3, // 23 + (PCODE)Store_FA2_FA3_FA4, // 24 + (PCODE)Store_FA2_FA3_FA4_FA5, // 25 + (PCODE)Store_FA2_FA3_FA4_FA5_FA6, // 26 + (PCODE)Store_FA2_FA3_FA4_FA5_FA6_FA7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Store_FA3, // 33 + (PCODE)Store_FA3_FA4, // 34 + (PCODE)Store_FA3_FA4_FA5, // 35 + (PCODE)Store_FA3_FA4_FA5_FA6, // 36 + (PCODE)Store_FA3_FA4_FA5_FA6_FA7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Store_FA4, // 44 + (PCODE)Store_FA4_FA5, // 45 + (PCODE)Store_FA4_FA5_FA6, // 46 + (PCODE)Store_FA4_FA5_FA6_FA7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Store_FA5, // 55 + (PCODE)Store_FA5_FA6, // 56 + (PCODE)Store_FA5_FA6_FA7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Store_FA6, // 66 + (PCODE)Store_FA6_FA7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Store_FA7 // 77 +}; + +PCODE GPRegsRefRoutines[] = +{ + (PCODE)Load_Ref_A0, // 0 - a0 + (PCODE)Load_Ref_A1, // 1 - a1 + (PCODE)Load_Ref_A2, // 2 - a2 + (PCODE)Load_Ref_A3, // 3 - a3 + (PCODE)Load_Ref_A4, // 4 - a4 + (PCODE)Load_Ref_A5, // 5 - a5 + (PCODE)Load_Ref_A6, // 6 - a6 + (PCODE)Load_Ref_A7 // 7 - a7 +}; + +PCODE GPRegsRefStoreRoutines[] = +{ + (PCODE)Store_Ref_A0, // 0 - a0 + (PCODE)Store_Ref_A1, // 1 - a1 + (PCODE)Store_Ref_A2, // 2 - a2 + (PCODE)Store_Ref_A3, // 3 - a3 + (PCODE)Store_Ref_A4, // 4 - a4 + (PCODE)Store_Ref_A5, // 5 - a5 + (PCODE)Store_Ref_A6, // 6 - a6 + (PCODE)Store_Ref_A7 // 7 - a7 +}; + +#endif // TARGET_RISCV64 + #define LOG_COMPUTE_CALL_STUB 0 PCODE CallStubGenerator::GetStackRoutine() @@ -1565,6 +2031,17 @@ extern "C" void InterpreterStubRet3Vector128(); extern "C" void InterpreterStubRet4Vector128(); #endif // TARGET_ARM64 +#if defined(TARGET_RISCV64) +extern "C" void CallJittedMethodRet2I8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet2Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetFloatInt(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetIntFloat(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRet2I8(); +extern "C" void InterpreterStubRet2Double(); +extern "C" void InterpreterStubRetFloatInt(); +extern "C" void InterpreterStubRetIntFloat(); +#endif // TARGET_RISCV64 + #if LOG_COMPUTE_CALL_STUB #define INVOKE_FUNCTION_PTR(functionPtrName) printf(#functionPtrName "\n"); return functionPtrName #else @@ -1636,6 +2113,16 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt case ReturnType4Vector128: INVOKE_FUNCTION_PTR(CallJittedMethodRet4Vector128); #endif // TARGET_ARM64 +#if defined(TARGET_RISCV64) + case ReturnType2I8: + INVOKE_FUNCTION_PTR(CallJittedMethodRet2I8); + case ReturnType2Double: + INVOKE_FUNCTION_PTR(CallJittedMethodRet2Double); + case ReturnTypeFloatInt: + INVOKE_FUNCTION_PTR(CallJittedMethodRetFloatInt); + case ReturnTypeIntFloat: + INVOKE_FUNCTION_PTR(CallJittedMethodRetIntFloat); +#endif // TARGET_RISCV64 default: _ASSERTE(!"Unexpected return type for interpreter stub"); return NULL; // This should never happen, but just in case. @@ -1713,6 +2200,16 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu case ReturnType4Vector128: RETURN_TYPE_HANDLER(InterpreterStubRet4Vector128); #endif // TARGET_ARM64 +#if defined(TARGET_RISCV64) + case ReturnType2I8: + RETURN_TYPE_HANDLER(InterpreterStubRet2I8); + case ReturnType2Double: + RETURN_TYPE_HANDLER(InterpreterStubRet2Double); + case ReturnTypeFloatInt: + RETURN_TYPE_HANDLER(InterpreterStubRetFloatInt); + case ReturnTypeIntFloat: + RETURN_TYPE_HANDLER(InterpreterStubRetIntFloat); +#endif // TARGET_RISCV64 default: _ASSERTE(!"Unexpected return type for interpreter stub"); return 0; // This should never happen, but just in case. @@ -2484,6 +2981,49 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArg _ASSERTE(!"The return types that are not HFA should be <= 16 bytes in size"); } } +#elif defined(TARGET_RISCV64) + { + FpStructInRegistersInfo info = pArgIt->GetReturnFpStructInRegistersInfo(); + // RISC-V pass floating-point struct fields in FA registers + if ((info.flags & FpStruct::OnlyOne) != 0) + { + // Single field - could be float or int in single register + return ReturnTypeDouble; // Use Double routine for both float and double (NaN-boxed) + } + else if ((info.flags & FpStruct::BothFloat) != 0) + { + // Two float/double fields + return ReturnType2Double; + } + else if ((info.flags & FpStruct::FloatInt) != 0) + { + // First field float, second int + return ReturnTypeFloatInt; + } + else if ((info.flags & FpStruct::IntFloat) != 0) + { + // First field int, second float + return ReturnTypeIntFloat; + } + else + { + _ASSERTE(info.flags == FpStruct::UseIntCallConv); + _ASSERTE(thReturnValueType.AsMethodTable()->IsRegPassedStruct()); + unsigned size = thReturnValueType.GetSize(); + if (size <= 8) + { + return ReturnTypeI8; + } + else if (size <= 16) + { + return ReturnType2I8; + } + else + { + _ASSERTE(!"Struct returns should be <= 16 bytes in size"); + } + } + } #else _ASSERTE(!"Struct returns by value are not supported yet"); #endif diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 59fd8051d0c0b3..da686ff9a76cec 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -98,8 +98,14 @@ class CallStubGenerator ReturnTypeVector128, ReturnType2Vector128, ReturnType3Vector128, - ReturnType4Vector128 + ReturnType4Vector128, #endif // TARGET_ARM64 +#if defined(TARGET_RISCV64) + ReturnType2I8, + ReturnType2Double, + ReturnTypeFloatInt, + ReturnTypeIntFloat, +#endif // TARGET_RISCV64 }; enum class RoutineType diff --git a/src/coreclr/vm/riscv64/asmconstants.h b/src/coreclr/vm/riscv64/asmconstants.h index 8db47c768a2f9a..d49e9b32b046dc 100644 --- a/src/coreclr/vm/riscv64/asmconstants.h +++ b/src/coreclr/vm/riscv64/asmconstants.h @@ -231,6 +231,31 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) #endif // FEATURE_TIERED_COMPILATION +#define OFFSETOF__ThreadLocalInfo__m_pThread 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadLocalInfo__m_pThread == offsetof(ThreadLocalInfo, m_pThread)) + +#ifdef FEATURE_INTERPRETER +#ifdef _DEBUG +#define OFFSETOF__InterpMethod__pCallStub 0x20 +#else +#define OFFSETOF__InterpMethod__pCallStub 0x20 +#endif +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod, pCallStub)) + +#define OFFSETOF__Thread__m_pInterpThreadContext 0xB30 +ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pInterpThreadContext == offsetof(Thread, m_pInterpThreadContext)) + +#define OFFSETOF__InterpThreadContext__pStackPointer 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpThreadContext__pStackPointer == offsetof(InterpThreadContext, pStackPointer)) + +#define OFFSETOF__CallStubHeader__Routines 0x10 +ASMCONSTANTS_C_ASSERT(OFFSETOF__CallStubHeader__Routines == offsetof(CallStubHeader, Routines)) + +#define SIZEOF__TransitionBlock 0xC0 +ASMCONSTANTS_C_ASSERT(SIZEOF__TransitionBlock == sizeof(TransitionBlock)) + +#endif // FEATURE_INTERPRETER + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 1 #define PROFILE_LEAVE 2 diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 7e2db0a99a69f9..823e5a1517c1f1 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -873,3 +873,1511 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT mv a1, t6 // Move temp register to first arg register for static method with return buffer EPILOG_BRANCH_REG t2 LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + +#ifdef FEATURE_INTERPRETER + +// Align interpreter stack by adjusting it by 8 bytes +LEAF_ENTRY InjectInterpStackAlign + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + jr t4 +LEAF_END InjectInterpStackAlign + +// Copy arguments from the interpreter stack to the processor stack +// The CPU stack slots are aligned to pointer size. +LEAF_ENTRY Load_Stack + lwu t4, 0(t2) // SP offset + lwu t5, 4(t2) // number of stack slots + addi t2, t2, 8 + add t4, sp, t4 + addi t5, t5, -8 + bltz t5, LOCAL_LABEL(CopyBy1) +LOCAL_LABEL(CopyLoop): + ld t6, 0(t3) + sd t6, 0(t4) + addi t3, t3, 8 + addi t4, t4, 8 + addi t5, t5, -8 + bgez t5, LOCAL_LABEL(CopyLoop) +LOCAL_LABEL(CopyBy1): + addi t5, t5, 8 + addi t5, t5, -1 + bltz t5, LOCAL_LABEL(Done) +LOCAL_LABEL(CopyLoop1): + lbu t6, 0(t3) + sb t6, 0(t4) + addi t3, t3, 1 + addi t4, t4, 1 + addi t5, t5, -1 + bgez t5, LOCAL_LABEL(CopyLoop1) +LOCAL_LABEL(Done): + // Align t3 to the stack slot size + addi t3, t3, 7 + andi t3, t3, -8 + ld t4, 0(t2) + addi t2, t2, 8 + jr t4 +LEAF_END Load_Stack + +// Load/Store stack reference routines (placeholders for GC tracking) +LEAF_ENTRY Load_Stack_Ref + lwu t4, 0(t2) // SP offset (zero-extend) + lwu t5, 4(t2) // size of the value type (zero-extend) + add t4, sp, t4 + sd t3, 0(t4) + add t3, t3, t5 + // Align t3 to the stack slot size + addi t3, t3, 7 + andi t3, t3, -8 + ld t4, 8(t2) // Next routine pointer (aligned) + addi t2, t2, 16 + jr t4 +LEAF_END Load_Stack_Ref + +LEAF_ENTRY Store_Stack_Ref + lwu t5, 0(t2) // SP offset (zero-extend) + lwu t4, 4(t2) // size of the value type (zero-extend) + add t5, sp, t5 + // Split large immediate into separate additions to avoid 12-bit limit + addi t5, t5, __PWTB_TransitionBlock + addi t5, t5, SIZEOF__TransitionBlock + ld t5, 0(t5) // t5 = pointer to source data from native stack + // Copy the data from native stack to interpreter stack + // Copy 8 bytes at a time if possible + addi t4, t4, -8 + bltz t4, LOCAL_LABEL(StoreRefCopyBy1) +LOCAL_LABEL(StoreRefCopyLoop8): + ld t6, 0(t5) + sd t6, 0(t3) + addi t5, t5, 8 + addi t3, t3, 8 + addi t4, t4, -8 + bgez t4, LOCAL_LABEL(StoreRefCopyLoop8) +LOCAL_LABEL(StoreRefCopyBy1): + // Copy remaining bytes (0-7) + addi t4, t4, 8 + beqz t4, LOCAL_LABEL(StoreRefCopyDone) +LOCAL_LABEL(StoreRefCopyLoop1): + lbu t6, 0(t5) + sb t6, 0(t3) + addi t5, t5, 1 + addi t3, t3, 1 + addi t4, t4, -1 + bnez t4, LOCAL_LABEL(StoreRefCopyLoop1) +LOCAL_LABEL(StoreRefCopyDone): + // Align t3 to the stack slot size + addi t3, t3, 7 + andi t3, t3, -8 + ld t4, 8(t2) // Next routine pointer (aligned) + addi t2, t2, 16 + jr t4 +LEAF_END Store_Stack_Ref + +// Macro for copying value types by reference +// Arguments: +// argReg - source register containing pointer to value type +// t4 - size of the value type (in bytes) +// t3 - destination pointer (interpreter stack) +.macro Copy_Ref argReg + // Copy 8 bytes at a time if possible + addi t4, t4, -8 + bltz t4, LOCAL_LABEL(CopyBy1\argReg) +LOCAL_LABEL(RefCopyLoop8\argReg): + ld t6, 0(\argReg) + sd t6, 0(t3) + addi \argReg, \argReg, 8 + addi t3, t3, 8 + addi t4, t4, -8 + bgez t4, LOCAL_LABEL(RefCopyLoop8\argReg) +LOCAL_LABEL(CopyBy1\argReg): + // Copy remaining bytes (0-7) + addi t4, t4, 8 + beqz t4, LOCAL_LABEL(RefCopyDone\argReg) +LOCAL_LABEL(RefCopyLoop1\argReg): + lbu t6, 0(\argReg) + sb t6, 0(t3) + addi \argReg, \argReg, 1 + addi t3, t3, 1 + addi t4, t4, -1 + bnez t4, LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyDone\argReg): + // Align t3 to the stack slot size + addi t3, t3, 7 + andi t3, t3, -8 +.endm + +// Routines for passing value type arguments by reference in general purpose registers A0..A7 +// from native code to the interpreter (Store direction) + +.macro Store_Ref argReg, argRegLower +LEAF_ENTRY Store_Ref_\argReg + ld t4, 0(t2) // size of the value type + addi t2, t2, 8 + Copy_Ref \argRegLower + ld t4, 0(t2) + addi t2, t2, 8 + jr t4 +LEAF_END Store_Ref_\argReg +.endm + +Store_Ref A0, a0 +Store_Ref A1, a1 +Store_Ref A2, a2 +Store_Ref A3, a3 +Store_Ref A4, a4 +Store_Ref A5, a5 +Store_Ref A6, a6 +Store_Ref A7, a7 + +// Routines for loading value type arguments by reference from interpreter stack +// to general purpose registers A0..A7 (Load direction - interpreter to native) + +.macro Load_Ref argReg, argRegLower +LEAF_ENTRY Load_Ref_\argReg + ld t4, 0(t2) // size of the value type + addi t2, t2, 8 + mv \argRegLower, t3 // Pass pointer to the value type in register + add t3, t3, t4 // Advance interpreter stack pointer + // Align t3 to the stack slot size + addi t3, t3, 7 + andi t3, t3, -8 + ld t4, 0(t2) + addi t2, t2, 8 + jr t4 +LEAF_END Load_Ref_\argReg +.endm + +Load_Ref A0, a0 +Load_Ref A1, a1 +Load_Ref A2, a2 +Load_Ref A3, a3 +Load_Ref A4, a4 +Load_Ref A5, a5 +Load_Ref A6, a6 +Load_Ref A7, a7 + +// Call jitted method routines +// a0 - routines array +// a1 - interpreter stack args location +// a2 - interpreter stack return value location +// a3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END CallJittedMethodRetVoid, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + mv a0, a2 // buffer return pointer goes in a0 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END CallJittedMethodRetBuff, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a2, 16(fp) + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + ld a2, 16(fp) + sd a0, 0(a2) + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN +NESTED_END CallJittedMethodRetI8, _TEXT + +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a2, 16(fp) + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + ld a2, 16(fp) + fsd fa0, 0(a2) + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN +NESTED_END CallJittedMethodRetDouble, _TEXT + +// a0 - routines array +// a1 - interpreter stack args location +// a2 - interpreter stack return value location +// a3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet2I8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a2, 16(fp) + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + ld a2, 16(fp) + sd a0, 0(a2) + sd a1, 8(a2) + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN +NESTED_END CallJittedMethodRet2I8, _TEXT + +// a0 - routines array +// a1 - interpreter stack args location +// a2 - interpreter stack return value location +// a3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet2Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a2, 16(fp) + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + ld a2, 16(fp) + fsd fa0, 0(a2) + fsd fa1, 8(a2) + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN +NESTED_END CallJittedMethodRet2Double, _TEXT + +// a0 - routines array +// a1 - interpreter stack args location +// a2 - interpreter stack return value location +// a3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetFloatInt, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a2, 16(fp) + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + ld a2, 16(fp) + fsd fa0, 0(a2) + sd a0, 8(a2) + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN +NESTED_END CallJittedMethodRetFloatInt, _TEXT + +// a0 - routines array +// a1 - interpreter stack args location +// a2 - interpreter stack return value location +// a3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetIntFloat, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -32 + sd a2, 16(fp) + sub sp, sp, a3 + mv t2, a0 + mv t3, a1 + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + ld a2, 16(fp) + sd a0, 0(a2) + fsd fa0, 8(a2) + EPILOG_STACK_RESTORE + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RETURN +NESTED_END CallJittedMethodRetIntFloat, _TEXT + +NESTED_ENTRY InterpreterStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + // IR bytecode address + mv t6, METHODDESC_REGISTER + + INLINE_GETTHREAD t5 // thrashes a0 + beqz t5, LOCAL_LABEL(NoManagedThreadOrCallStub) + + li t1, OFFSETOF__Thread__m_pInterpThreadContext + add t1, t5, t1 + ld t4, 0(t1) + bnez t4, LOCAL_LABEL(HaveInterpThreadContext) + +LOCAL_LABEL(NoManagedThreadOrCallStub): + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 + call C_FUNC(GetInterpThreadContextWithPossiblyMissingThreadOrCallStub) + mv t4, a0 + +LOCAL_LABEL(HaveInterpThreadContext): + + RESTORE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + RESTORE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters + + ld t3, 0(t6) // InterpMethod* + ld t3, OFFSETOF__InterpMethod__pCallStub(t3) + beqz t3, LOCAL_LABEL(NoManagedThreadOrCallStub) + addi t2, t3, OFFSETOF__CallStubHeader__Routines + ld t3, OFFSETOF__InterpThreadContext__pStackPointer(t4) + // t6 contains IR bytecode address + // Copy the arguments to the interpreter stack, invoke the InterpExecMethod and load the return value + ld t4, 0(t2) + addi t2, t2, 8 + jalr t4 + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END InterpreterStub, _TEXT + +NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetVoid, _TEXT + +NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + ld a0, 0(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetI8, _TEXT + +NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + fld fa0, 0(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetDouble, _TEXT + +NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + mv a2, a0 // save caller's return buffer in a2 + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer (x19) + call C_FUNC(ExecuteInterpretedMethod) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetBuff, _TEXT + +NESTED_ENTRY InterpreterStubRet2I8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + ld a1, 8(a0) + ld a0, 0(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet2I8, _TEXT + +NESTED_ENTRY InterpreterStubRet2Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + fld fa0, 0(a0) + fld fa1, 8(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet2Double, _TEXT + +NESTED_ENTRY InterpreterStubRetFloat, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + flw fa0, 0(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetFloat, _TEXT + +NESTED_ENTRY InterpreterStubRet2Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + flw fa0, 0(a0) + flw fa1, 4(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRet2Float, _TEXT + +NESTED_ENTRY InterpreterStubRetFloatInt, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + fld fa0, 0(a0) + ld a0, 8(a0) + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetFloatInt, _TEXT + +NESTED_ENTRY InterpreterStubRetIntFloat, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, -16 + // The +16 is for the fp, ra above + addi a0, sp, __PWTB_TransitionBlock + 16 + mv a1, t6 // the IR bytecode pointer + mv a2, zero + call C_FUNC(ExecuteInterpretedMethod) + ld a1, 0(a0) + fld fa0, 8(a0) + mv a0, a1 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RETURN +NESTED_END InterpreterStubRetIntFloat, _TEXT + +// Copy arguments from the processor stack to the interpreter stack +// The CPU stack slots are aligned to pointer size. + +LEAF_ENTRY Store_Stack + lwu t4, 0(t2) // SP offset + lwu t5, 4(t2) // size (multiple of stack slot size) + add t4, sp, t4 + // Split large immediate into separate additions to avoid 12-bit limit + addi t4, t4, __PWTB_TransitionBlock + addi t4, t4, SIZEOF__TransitionBlock +LOCAL_LABEL(StoreCopyLoop): + ld t6, 0(t4) + sd t6, 0(t3) + addi t4, t4, 8 + addi t3, t3, 8 + addi t5, t5, -8 + bnez t5, LOCAL_LABEL(StoreCopyLoop) + ld t4, 8(t2) + addi t2, t2, 16 + EPILOG_BRANCH_REG t4 +LEAF_END Store_Stack + +// Load/Store register routines +// t2 = routine pointer, t3 = interpreter stack pointer +// Pattern: Load from interpreter stack to argument registers, then jump to next routine + +LEAF_ENTRY Load_A0 + ld a0, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0 + +LEAF_ENTRY Load_A0_A1 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1 + +LEAF_ENTRY Load_A0_A1_A2 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A2 + ld a2, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1_A2 + +LEAF_ENTRY Load_A0_A1_A2_A3 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A2_A3 + ld a2, 0(t3) + ld a3, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1_A2_A3 + +LEAF_ENTRY Load_A0_A1_A2_A3_A4 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A2_A3_A4 + ld a2, 0(t3) + ld a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A4 + ld a4, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1_A2_A3_A4 + +LEAF_ENTRY Load_A0_A1_A2_A3_A4_A5 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A2_A3_A4_A5 + ld a2, 0(t3) + ld a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A4_A5 + ld a4, 0(t3) + ld a5, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1_A2_A3_A4_A5 + +LEAF_ENTRY Load_A0_A1_A2_A3_A4_A5_A6 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A2_A3_A4_A5_A6 + ld a2, 0(t3) + ld a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A4_A5_A6 + ld a4, 0(t3) + ld a5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A6 + ld a6, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1_A2_A3_A4_A5_A6 + +LEAF_ENTRY Load_A0_A1_A2_A3_A4_A5_A6_A7 + ld a0, 0(t3) + ld a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A2_A3_A4_A5_A6_A7 + ld a2, 0(t3) + ld a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A4_A5_A6_A7 + ld a4, 0(t3) + ld a5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A6_A7 + ld a6, 0(t3) + ld a7, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A0_A1_A2_A3_A4_A5_A6_A7 + +LEAF_ENTRY Load_A1 + ld a1, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1 + +LEAF_ENTRY Load_A1_A2 + ld a1, 0(t3) + ld a2, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1_A2 + +LEAF_ENTRY Load_A1_A2_A3 + ld a1, 0(t3) + ld a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A3 + ld a3, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1_A2_A3 + +LEAF_ENTRY Load_A1_A2_A3_A4 + ld a1, 0(t3) + ld a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A3_A4 + ld a3, 0(t3) + ld a4, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1_A2_A3_A4 + +LEAF_ENTRY Load_A1_A2_A3_A4_A5 + ld a1, 0(t3) + ld a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A3_A4_A5 + ld a3, 0(t3) + ld a4, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A5 + ld a5, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1_A2_A3_A4_A5 + +LEAF_ENTRY Load_A1_A2_A3_A4_A5_A6 + ld a1, 0(t3) + ld a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A3_A4_A5_A6 + ld a3, 0(t3) + ld a4, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A5_A6 + ld a5, 0(t3) + ld a6, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1_A2_A3_A4_A5_A6 + +LEAF_ENTRY Load_A1_A2_A3_A4_A5_A6_A7 + ld a1, 0(t3) + ld a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A3_A4_A5_A6_A7 + ld a3, 0(t3) + ld a4, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A5_A6_A7 + ld a5, 0(t3) + ld a6, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_A7 + ld a7, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_A1_A2_A3_A4_A5_A6_A7 + +// Store routines: Store argument registers to interpreter stack + +LEAF_ENTRY Store_A0 + sd a0, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0 + +LEAF_ENTRY Store_A0_A1 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1 + +LEAF_ENTRY Store_A0_A1_A2 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A2 + sd a2, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1_A2 + +LEAF_ENTRY Store_A0_A1_A2_A3 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A2_A3 + sd a2, 0(t3) + sd a3, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1_A2_A3 + +LEAF_ENTRY Store_A0_A1_A2_A3_A4 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A2_A3_A4 + sd a2, 0(t3) + sd a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A4 + sd a4, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1_A2_A3_A4 + +LEAF_ENTRY Store_A0_A1_A2_A3_A4_A5 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A2_A3_A4_A5 + sd a2, 0(t3) + sd a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A4_A5 + sd a4, 0(t3) + sd a5, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1_A2_A3_A4_A5 + +LEAF_ENTRY Store_A0_A1_A2_A3_A4_A5_A6 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A2_A3_A4_A5_A6 + sd a2, 0(t3) + sd a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A4_A5_A6 + sd a4, 0(t3) + sd a5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A6 + sd a6, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1_A2_A3_A4_A5_A6 + +LEAF_ENTRY Store_A0_A1_A2_A3_A4_A5_A6_A7 + sd a0, 0(t3) + sd a1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A2_A3_A4_A5_A6_A7 + sd a2, 0(t3) + sd a3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A4_A5_A6_A7 + sd a4, 0(t3) + sd a5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A6_A7 + sd a6, 0(t3) + sd a7, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A0_A1_A2_A3_A4_A5_A6_A7 + +LEAF_ENTRY Store_A1 + sd a1, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1 + +LEAF_ENTRY Store_A1_A2 + sd a1, 0(t3) + sd a2, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1_A2 + +LEAF_ENTRY Store_A1_A2_A3 + sd a1, 0(t3) + sd a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A3 + sd a3, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1_A2_A3 + +LEAF_ENTRY Store_A1_A2_A3_A4 + sd a1, 0(t3) + sd a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A3_A4 + sd a3, 0(t3) + sd a4, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1_A2_A3_A4 + +LEAF_ENTRY Store_A1_A2_A3_A4_A5 + sd a1, 0(t3) + sd a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A3_A4_A5 + sd a3, 0(t3) + sd a4, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A5 + sd a5, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1_A2_A3_A4_A5 + +LEAF_ENTRY Store_A1_A2_A3_A4_A5_A6 + sd a1, 0(t3) + sd a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A3_A4_A5_A6 + sd a3, 0(t3) + sd a4, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A5_A6 + sd a5, 0(t3) + sd a6, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1_A2_A3_A4_A5_A6 + +LEAF_ENTRY Store_A1_A2_A3_A4_A5_A6_A7 + sd a1, 0(t3) + sd a2, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A3_A4_A5_A6_A7 + sd a3, 0(t3) + sd a4, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A5_A6_A7 + sd a5, 0(t3) + sd a6, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_A7 + sd a7, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_A1_A2_A3_A4_A5_A6_A7 + +// Float point load/store routines + +LEAF_ENTRY Load_FA0 + fld fa0, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0 + +LEAF_ENTRY Load_FA0_FA1 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1 + +LEAF_ENTRY Load_FA0_FA1_FA2 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA2 + fld fa2, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1_FA2 + +LEAF_ENTRY Load_FA0_FA1_FA2_FA3 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA2_FA3 + fld fa2, 0(t3) + fld fa3, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1_FA2_FA3 + +LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA2_FA3_FA4 + fld fa2, 0(t3) + fld fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA4 + fld fa4, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1_FA2_FA3_FA4 + +LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4_FA5 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA2_FA3_FA4_FA5 + fld fa2, 0(t3) + fld fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA4_FA5 + fld fa4, 0(t3) + fld fa5, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1_FA2_FA3_FA4_FA5 + +LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA2_FA3_FA4_FA5_FA6 + fld fa2, 0(t3) + fld fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA4_FA5_FA6 + fld fa4, 0(t3) + fld fa5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA6 + fld fa6, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6 + +LEAF_ENTRY Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + fld fa0, 0(t3) + fld fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA2_FA3_FA4_FA5_FA6_FA7 + fld fa2, 0(t3) + fld fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA4_FA5_FA6_FA7 + fld fa4, 0(t3) + fld fa5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Load_FA6_FA7 + fld fa6, 0(t3) + fld fa7, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + +// Additional Load_FA* routines starting from FA1, FA3, FA5, FA7 +LEAF_ENTRY Load_FA1 + fld fa1, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1 + +LEAF_ENTRY Load_FA1_FA2 + fld fa1, 0(t3) + fld fa2, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1_FA2 + +LEAF_ENTRY Load_FA1_FA2_FA3 + fld fa1, 0(t3) + fld fa2, 8(t3) + fld fa3, 16(t3) + addi t3, t3, 24 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1_FA2_FA3 + +LEAF_ENTRY Load_FA1_FA2_FA3_FA4 + fld fa1, 0(t3) + fld fa2, 8(t3) + fld fa3, 16(t3) + fld fa4, 24(t3) + addi t3, t3, 32 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1_FA2_FA3_FA4 + +LEAF_ENTRY Load_FA1_FA2_FA3_FA4_FA5 + fld fa1, 0(t3) + fld fa2, 8(t3) + fld fa3, 16(t3) + fld fa4, 24(t3) + fld fa5, 32(t3) + addi t3, t3, 40 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1_FA2_FA3_FA4_FA5 + +LEAF_ENTRY Load_FA1_FA2_FA3_FA4_FA5_FA6 + fld fa1, 0(t3) + fld fa2, 8(t3) + fld fa3, 16(t3) + fld fa4, 24(t3) + fld fa5, 32(t3) + fld fa6, 40(t3) + addi t3, t3, 48 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1_FA2_FA3_FA4_FA5_FA6 + +LEAF_ENTRY Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + fld fa1, 0(t3) + fld fa2, 8(t3) + fld fa3, 16(t3) + fld fa4, 24(t3) + fld fa5, 32(t3) + fld fa6, 40(t3) + fld fa7, 48(t3) + addi t3, t3, 56 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + +LEAF_ENTRY Load_FA3 + fld fa3, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA3 + +LEAF_ENTRY Load_FA3_FA4 + fld fa3, 0(t3) + fld fa4, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA3_FA4 + +LEAF_ENTRY Load_FA3_FA4_FA5 + fld fa3, 0(t3) + fld fa4, 8(t3) + fld fa5, 16(t3) + addi t3, t3, 24 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA3_FA4_FA5 + +LEAF_ENTRY Load_FA3_FA4_FA5_FA6 + fld fa3, 0(t3) + fld fa4, 8(t3) + fld fa5, 16(t3) + fld fa6, 24(t3) + addi t3, t3, 32 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA3_FA4_FA5_FA6 + +LEAF_ENTRY Load_FA3_FA4_FA5_FA6_FA7 + fld fa3, 0(t3) + fld fa4, 8(t3) + fld fa5, 16(t3) + fld fa6, 24(t3) + fld fa7, 32(t3) + addi t3, t3, 40 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA3_FA4_FA5_FA6_FA7 + +LEAF_ENTRY Load_FA5 + fld fa5, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA5 + +LEAF_ENTRY Load_FA5_FA6 + fld fa5, 0(t3) + fld fa6, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA5_FA6 + +LEAF_ENTRY Load_FA5_FA6_FA7 + fld fa5, 0(t3) + fld fa6, 8(t3) + fld fa7, 16(t3) + addi t3, t3, 24 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA5_FA6_FA7 + +LEAF_ENTRY Load_FA7 + fld fa7, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Load_FA7 + +LEAF_ENTRY Store_FA0 + fsd fa0, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0 + +LEAF_ENTRY Store_FA0_FA1 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1 + +LEAF_ENTRY Store_FA0_FA1_FA2 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA2 + fsd fa2, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1_FA2 + +LEAF_ENTRY Store_FA0_FA1_FA2_FA3 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA2_FA3 + fsd fa2, 0(t3) + fsd fa3, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1_FA2_FA3 + +LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA2_FA3_FA4 + fsd fa2, 0(t3) + fsd fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA4 + fsd fa4, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1_FA2_FA3_FA4 + +LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4_FA5 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA2_FA3_FA4_FA5 + fsd fa2, 0(t3) + fsd fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA4_FA5 + fsd fa4, 0(t3) + fsd fa5, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1_FA2_FA3_FA4_FA5 + +LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA2_FA3_FA4_FA5_FA6 + fsd fa2, 0(t3) + fsd fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA4_FA5_FA6 + fsd fa4, 0(t3) + fsd fa5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA6 + fsd fa6, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6 + +LEAF_ENTRY Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + fsd fa0, 0(t3) + fsd fa1, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA2_FA3_FA4_FA5_FA6_FA7 + fsd fa2, 0(t3) + fsd fa3, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA4_FA5_FA6_FA7 + fsd fa4, 0(t3) + fsd fa5, 8(t3) + addi t3, t3, 16 +ALTERNATE_ENTRY Store_FA6_FA7 + fsd fa6, 0(t3) + fsd fa7, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA0_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + +// Additional Store_FA* routines starting from FA1, FA3, FA5, FA7 +LEAF_ENTRY Store_FA1 + fsd fa1, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1 + +LEAF_ENTRY Store_FA1_FA2 + fsd fa1, 0(t3) + fsd fa2, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1_FA2 + +LEAF_ENTRY Store_FA1_FA2_FA3 + fsd fa1, 0(t3) + fsd fa2, 8(t3) + fsd fa3, 16(t3) + addi t3, t3, 24 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1_FA2_FA3 + +LEAF_ENTRY Store_FA1_FA2_FA3_FA4 + fsd fa1, 0(t3) + fsd fa2, 8(t3) + fsd fa3, 16(t3) + fsd fa4, 24(t3) + addi t3, t3, 32 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1_FA2_FA3_FA4 + +LEAF_ENTRY Store_FA1_FA2_FA3_FA4_FA5 + fsd fa1, 0(t3) + fsd fa2, 8(t3) + fsd fa3, 16(t3) + fsd fa4, 24(t3) + fsd fa5, 32(t3) + addi t3, t3, 40 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1_FA2_FA3_FA4_FA5 + +LEAF_ENTRY Store_FA1_FA2_FA3_FA4_FA5_FA6 + fsd fa1, 0(t3) + fsd fa2, 8(t3) + fsd fa3, 16(t3) + fsd fa4, 24(t3) + fsd fa5, 32(t3) + fsd fa6, 40(t3) + addi t3, t3, 48 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1_FA2_FA3_FA4_FA5_FA6 + +LEAF_ENTRY Store_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + fsd fa1, 0(t3) + fsd fa2, 8(t3) + fsd fa3, 16(t3) + fsd fa4, 24(t3) + fsd fa5, 32(t3) + fsd fa6, 40(t3) + fsd fa7, 48(t3) + addi t3, t3, 56 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA1_FA2_FA3_FA4_FA5_FA6_FA7 + +LEAF_ENTRY Store_FA3 + fsd fa3, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA3 + +LEAF_ENTRY Store_FA3_FA4 + fsd fa3, 0(t3) + fsd fa4, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA3_FA4 + +LEAF_ENTRY Store_FA3_FA4_FA5 + fsd fa3, 0(t3) + fsd fa4, 8(t3) + fsd fa5, 16(t3) + addi t3, t3, 24 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA3_FA4_FA5 + +LEAF_ENTRY Store_FA3_FA4_FA5_FA6 + fsd fa3, 0(t3) + fsd fa4, 8(t3) + fsd fa5, 16(t3) + fsd fa6, 24(t3) + addi t3, t3, 32 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA3_FA4_FA5_FA6 + +LEAF_ENTRY Store_FA3_FA4_FA5_FA6_FA7 + fsd fa3, 0(t3) + fsd fa4, 8(t3) + fsd fa5, 16(t3) + fsd fa6, 24(t3) + fsd fa7, 32(t3) + addi t3, t3, 40 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA3_FA4_FA5_FA6_FA7 + +LEAF_ENTRY Store_FA5 + fsd fa5, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA5 + +LEAF_ENTRY Store_FA5_FA6 + fsd fa5, 0(t3) + fsd fa6, 8(t3) + addi t3, t3, 16 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA5_FA6 + +LEAF_ENTRY Store_FA5_FA6_FA7 + fsd fa5, 0(t3) + fsd fa6, 8(t3) + fsd fa7, 16(t3) + addi t3, t3, 24 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA5_FA6_FA7 + +LEAF_ENTRY Store_FA7 + fsd fa7, 0(t3) + addi t3, t3, 8 + ld t4, 0(t2) + addi t2, t2, 8 + EPILOG_BRANCH_REG t4 +LEAF_END Store_FA7 + +#endif // FEATURE_INTERPRETER diff --git a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props index 969316103d3db2..fde29fabad3876 100644 --- a/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props +++ b/src/installer/pkg/sfx/Microsoft.NETCore.App/Directory.Build.props @@ -119,8 +119,8 @@ - - + + diff --git a/src/tests/JIT/interpreter/InterpreterTester.csproj b/src/tests/JIT/interpreter/InterpreterTester.csproj index ad3bb6e036fabe..65884cf1e65cd8 100644 --- a/src/tests/JIT/interpreter/InterpreterTester.csproj +++ b/src/tests/JIT/interpreter/InterpreterTester.csproj @@ -2,7 +2,7 @@ true true - true + true true