diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 7d2f40deaf2440..efd0755e0fc3ef 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -653,6 +653,33 @@ LEAF_ENTRY Store_Stack, _TEXT jmp qword ptr [r11] LEAF_END Store_Stack, _TEXT +LEAF_ENTRY Load_Stack_Ref, _TEXT + mov esi, dword ptr [r11 + 8] ; SP offset + mov edi, dword ptr [r11 + 12] ; size of the value type + add rsi, 8; return address + add rsi, rsp + mov qword ptr [rsi], r10 + add r10, rdi + lea r10, [r10 + 7] + and r10, 0fffffffffffffff8h + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Stack_Ref, _TEXT + +LEAF_ENTRY Store_Stack_Ref, _TEXT + mov esi, dword ptr [r11 + 8] ; SP offset + mov ecx, dword ptr [r11 + 12] ; size of the value type + mov rsi, [rsp + rsi + 8 + __InterpreterStubArgumentRegistersOffset] + mov rdi, r10 + rep movsb + ; align rdi up to the stack slot size + lea rdi, [rdi + 7] + and rdi, 0fffffffffffffff8h + mov r10, rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Store_Stack_Ref, _TEXT + ; Routines for passing value type arguments by reference in general purpose registers RCX, RDX, R8, R9 ; from native code to the interpreter diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 080a98f0351017..af6ab66e9e81b4 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -889,7 +889,62 @@ LOCAL_LABEL(StoreCopyLoop): bne LOCAL_LABEL(StoreCopyLoop) ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 -LEAF_END Load_Stack +LEAF_END Store_Stack + +LEAF_ENTRY Load_Stack_Ref, _TEXT + ldr w11, [x10], #4 // SP offset + ldr w12, [x10], #4 // size of the value type + add x11, sp, x11 + str x9, [x11] + add x9, x9, x12 + // Align x9 to the stack slot size + add x9, x9, 7 + and x9, x9, 0xfffffffffffffff8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Stack_Ref, _TEXT + + +.macro Copy_Ref argReg + cmp x11, #16 + blt LOCAL_LABEL(CopyBy8\argReg) +LOCAL_LABEL(RefCopyLoop16\argReg): + ldp x13, x14, [\argReg], #16 + stp x13, x14, [x9], #16 + subs x11, x11, #16 + bgt LOCAL_LABEL(RefCopyLoop16\argReg) + beq LOCAL_LABEL(RefCopyDone\argReg) + add x11, x11, #16 +LOCAL_LABEL(CopyBy8\argReg): + cmp x11, #8 + blt LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyLoop8\argReg): + ldr x13, [\argReg], #8 + str x13, [x9], #8 + subs x11, x11, #8 + bgt LOCAL_LABEL(RefCopyLoop8\argReg) + beq LOCAL_LABEL(RefCopyDone\argReg) + add x11, x11, #8 +LOCAL_LABEL(RefCopyLoop1\argReg): + ldrb w13, [\argReg], #1 + strb w13, [x9], #1 + subs x11, x11, #1 + bne LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyDone\argReg): + // Align x9 to the stack slot size + add x9, x9, 7 + and x9, x9, 0xfffffffffffffff8 +.endm + +LEAF_ENTRY Store_Stack_Ref, _TEXT + ldr w12, [x10], #4 // SP offset + ldr w11, [x10], #4 // size of the value type + add x12, sp, x12 + ldr x12, [x12, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] + Copy_Ref x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Store_Stack_Ref, _TEXT #ifdef TARGET_APPLE @@ -932,34 +987,7 @@ LEAF_END Store_Stack_4B LEAF_ENTRY Store_Ref_\argReg ldr x11, [x10], #8 // size of the value type - cmp x11, #16 - blt LOCAL_LABEL(CopyBy8\argReg) -LOCAL_LABEL(RefCopyLoop16\argReg): - ldp x13, x14, [\argReg], #16 - stp x13, x14, [x9], #16 - subs x11, x11, #16 - bgt LOCAL_LABEL(RefCopyLoop16\argReg) - beq LOCAL_LABEL(RefCopyDone\argReg) - add x11, x11, #16 -LOCAL_LABEL(CopyBy8\argReg): - cmp x11, #8 - blt LOCAL_LABEL(RefCopyLoop1\argReg) -LOCAL_LABEL(RefCopyLoop8\argReg): - ldr x13, [\argReg], #8 - str x13, [x9], #8 - subs x11, x11, #8 - bgt LOCAL_LABEL(RefCopyLoop8\argReg) - beq LOCAL_LABEL(RefCopyDone\argReg) - add x11, x11, #8 -LOCAL_LABEL(RefCopyLoop1\argReg): - ldrb w13, [\argReg], #1 - strb w13, [x9], #1 - subs x11, x11, #1 - bne LOCAL_LABEL(RefCopyLoop1\argReg) -LOCAL_LABEL(RefCopyDone\argReg): - // Align x9 to the stack slot size - add x9, x9, 7 - and x9, x9, 0xfffffffffffffff8 + Copy_Ref \argReg ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Store_Ref_\argReg diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 44371193e3b85e..2eed729107339f 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1247,13 +1247,23 @@ StoreCopyLoop bne StoreCopyLoop ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 - LEAF_END Load_Stack + LEAF_END Store_Stack - MACRO - Store_Ref $argReg + LEAF_ENTRY Load_Stack_Ref + ldr w11, [x10], #4 ; SP offset + ldr w12, [x10], #4 ; size of the value type + add x11, sp, x11 + str x9, [x11] + add x9, x9, x12 + ; Align x9 to the stack slot size + add x9, x9, 7 + and x9, x9, 0xfffffffffffffff8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Stack_Ref - LEAF_ENTRY Store_Ref_$argReg - ldr x11, [x10], #8 ; size of the value type + MACRO + Copy_Ref $argReg cmp x11, #16 blt CopyBy8$argReg RefCopyLoop16$argReg @@ -1282,6 +1292,24 @@ RefCopyDone$argReg ; Align x9 to the stack slot size add x9, x9, 7 and x9, x9, 0xfffffffffffffff8 + MEND + + LEAF_ENTRY Store_Stack_Ref + ldr w12, [x10], #4 ; SP offset + ldr w11, [x10], #4 ; size of the value type + add x12, sp, x12 + ldr x12, [x12, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] + Copy_Ref x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Store_Stack_Ref + + MACRO + Store_Ref $argReg + + LEAF_ENTRY Store_Ref_$argReg + ldr x11, [x10], #8 ; size of the value type + Copy_Ref $argReg ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Store_Ref_$argReg diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 3cdfe7c68db14d..bfc3931e0e0474 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -19,6 +19,11 @@ extern "C" void Store_Stack_2B(); extern "C" void Store_Stack_4B(); #endif // TARGET_APPLE && TARGET_ARM64 +#ifndef UNIX_AMD64_ABI +extern "C" void Load_Stack_Ref(); +extern "C" void Store_Stack_Ref(); +#endif // !UNIX_AMD64_ABI + #ifdef TARGET_AMD64 #ifdef TARGET_WINDOWS @@ -1029,6 +1034,14 @@ PCODE CallStubGenerator::GetGPRegRefRoutine(int r) return m_interpreterToNative ? GPRegsRefRoutines[r] : GPRegsRefStoreRoutines[r]; } +PCODE CallStubGenerator::GetStackRefRoutine() +{ +#if LOG_COMPUTE_CALL_STUB + printf("GetStackRefRoutine\n"); +#endif + return m_interpreterToNative ? (PCODE)Load_Stack_Ref : (PCODE)Store_Stack_Ref; +} + #endif // UNIX_AMD64_ABI PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) @@ -1413,6 +1426,7 @@ void CallStubGenerator::ComputeCallStub(MetaSig &sig, PCODE *pRoutines) // The return buffer on Windows AMD64 is passed in the first argument register, so the // "this" argument is be passed in the second argument register. m_r1 = 1; + m_r2 = 1; } else #endif // TARGET_WINDOWS && TARGET_AMD64 @@ -1628,7 +1642,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } - else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8)) + else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8) && (!pArgIt || !pArgIt->IsArgPassedByRef())) { // Extend an existing range, but only if the argument is at least pointer size large. // The only case when this is not true is on Apple ARM64 OSes where primitive type smaller @@ -1677,10 +1691,20 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD // we always process single argument passed by reference using single routine. if (pArgIt != NULL && pArgIt->IsArgPassedByRef()) { - _ASSERTE(argLocDesc.m_cGenReg == 1); - pRoutines[m_routineIndex++] = GetGPRegRefRoutine(argLocDesc.m_idxGenReg); - pRoutines[m_routineIndex++] = pArgIt->GetArgSize(); - m_r1 = NoRange; + if (argLocDesc.m_cGenReg == 1) + { + pRoutines[m_routineIndex++] = GetGPRegRefRoutine(argLocDesc.m_idxGenReg); + pRoutines[m_routineIndex++] = pArgIt->GetArgSize(); + m_r1 = NoRange; + } + else + { + _ASSERTE(argLocDesc.m_byteStackIndex != -1); + pRoutines[m_routineIndex++] = GetStackRefRoutine(); + pRoutines[m_routineIndex++] = ((int64_t)pArgIt->GetArgSize() << 32) | argLocDesc.m_byteStackIndex; + m_totalStackSize += argLocDesc.m_byteStackSize; + m_s1 = NoRange; + } } #endif // UNIX_AMD64_ABI } diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index ee15fbcdbe4ca9..9f628f6ac08024 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -109,6 +109,7 @@ class CallStubGenerator #ifndef UNIX_AMD64_ABI PCODE GetGPRegRefRoutine(int r); + PCODE GetStackRefRoutine(); #endif // !UNIX_AMD64_ABI PCODE GetStackRoutine(); #if defined(TARGET_APPLE) && defined(TARGET_ARM64) diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index 6930cbae213e52..4c7e31b2cc9ce2 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -605,6 +605,51 @@ static void TestCallingConvention12JitToInterpreter(bool init) } } + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention13(int a, int b, int c, int d, int e, int f, int g, int h, TestStruct4ii s) + { + Console.WriteLine("TestCallingConvention13: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}, g = {6}, h = {7}, s = ({8}, {9}, {10}, {11})", a, b, c, d, e, f, g, h, s.a, s.b, s.c, s.d); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention13Rev(int a, int b, int c, int d, int e, int f, int g, int h, TestStruct4ii s) + { + Console.Write("TestCallingConvention13Rev: a = "); + Console.Write(a); + Console.Write(", b = "); + Console.Write(b); + Console.Write(", c = "); + Console.Write(c); + Console.Write(", d = "); + Console.Write(d); + Console.Write(", e = "); + Console.Write(e); + Console.Write(", f = "); + Console.Write(f); + Console.Write(", g = "); + Console.Write(g); + Console.Write(", h = "); + Console.Write(h); + Console.Write(", s = ("); + Console.Write(s.a); + Console.Write(", "); + Console.Write(s.b); + Console.Write(", "); + Console.Write(s.c); + Console.Write(", "); + Console.Write(s.d); + Console.WriteLine(")"); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestCallingConvention13JitToInterpreter(bool init) + { + if (!init) + { + TestCallingConvention13Rev(1, 2, 3, 4, 5, 6, 7, 8, new TestStruct4ii { a = 9, b = 10, c = 11, d = 12 }); + } + } + // This method is invoked before we start interpretting anything, so the methods invoked in it will be jitted. // This is necessary for the calling convention tests that test calls from the interpreter to the JITted code // to actually test things. @@ -655,6 +700,7 @@ static void EnsureCallingConventionTestTargetMethodsAreJitted() Console.WriteLine(s11.c); TestCallingConvention12(1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 10, 11, 12); + TestCallingConvention13(1, 2, 3, 4, 5, 6, 7, 8, new TestStruct4ii { a = 9, b = 10, c = 11, d = 12 }); TestCallingConvention0JitToInterpreter(true); TestCallingConvention1JitToInterpreter(true); @@ -669,6 +715,7 @@ static void EnsureCallingConventionTestTargetMethodsAreJitted() TestCallingConvention10JitToInterpreter(true); TestCallingConvention11JitToInterpreter(true); TestCallingConvention12JitToInterpreter(true); + TestCallingConvention13JitToInterpreter(true); } static int Main(string[] args) @@ -698,6 +745,7 @@ public static void RunInterpreterTests() TestCallingConvention10JitToInterpreter(false); TestCallingConvention11JitToInterpreter(false); TestCallingConvention12JitToInterpreter(false); + TestCallingConvention13JitToInterpreter(false); TestCallingConvention0(1, 2.0f, 3, 4.0, 5, 6.0); @@ -779,6 +827,7 @@ public static void RunInterpreterTests() Console.WriteLine(s11.c); TestCallingConvention12(1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 10, 11, 12); + TestCallingConvention13(1, 2, 3, 4, 5, 6, 7, 8, new TestStruct4ii { a = 9, b = 10, c = 11, d = 12 }); // Console.WriteLine("Run interp tests"); Console.WriteLine("Sum");