From 71374946a742e366968ed0ae5d1e9246f7b46fc5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 31 Jul 2021 16:11:31 +0200 Subject: [PATCH 01/22] Support fast tailcalls in R2R Partially addresses #5857 --- src/coreclr/jit/codegencommon.cpp | 37 ++++++++++++-- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/lowerxarch.cpp | 48 ++++++++++++------- src/coreclr/jit/lsraxarch.cpp | 5 +- .../tools/Common/JitInterface/CorInfoImpl.cs | 2 +- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 8 +--- 6 files changed, 69 insertions(+), 33 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index dcf1768c60a7dd..bf870d25d6d3de 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8483,10 +8483,39 @@ void CodeGen::genFnEpilog(BasicBlock* block) } else { - // Target requires indirection to obtain. genCallInstruction will have materialized - // it into RAX already, so just jump to it. The stack walker requires that a register - // indirect tail call be rex.w prefixed. - GetEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX); + GenTree* target = callType == CT_INDIRECT ? call->gtCallAddr : call->gtControlExpr; + if (target->isContained()) + { + // We can only tailcall with indirect target if the target + // is an offset as otherwise we may need registers that + // could have been wiped out by the epilog. + noway_assert(target->isIndir()); + assert(target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()); + assert(!target->AsIndir()->HasIndex()); + assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); + + GetEmitter()->emitIns_Call( + emitter::EC_FUNC_TOKEN_INDIR, + call->gtCallMethHnd, + INDEBUG_LDISASM_COMMA(nullptr) + (void*)target->AsIndir()->Base()->AsIntConCommon()->IconValue(), + 0, + EA_UNKNOWN + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, + true /* isJump */ + ); + } + else + { + // Target requires indirection to obtain. genCallInstruction will have materialized + // it into RAX already, so just jump to it. The stack walker requires that a register + // indirect tail call be rex.w prefixed. + GetEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX); + } } #else diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 529d1fe948799c..959e06e8b9ffb8 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5191,7 +5191,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // If this is indirect then we go through RAX with epilog sequence // generating "jmp rax". Otherwise epilog will try to generate a // rip-relative jump. - if (target != nullptr) + if (target != nullptr && !target->isContained()) { genConsumeReg(target); genCopyRegIfNeeded(target, REG_RAX); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 43c0df62042365..92dcae26cfbe50 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4466,26 +4466,40 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) // we should never see a gtControlExpr whose type is void. assert(ctrlExpr->TypeGet() != TYP_VOID); - // In case of fast tail implemented as jmp, make sure that gtControlExpr is - // computed into a register. - if (!call->IsFastTailCall()) - { + // In case of fast tail implemented as jmp it can be problematic to mark the control expression as contained + // since it may rely on registers that have been cleaned up. The exception is indirections off of constants + // that don't need any registers. #ifdef TARGET_X86 - // On x86, we need to generate a very specific pattern for indirect VSD calls: - // - // 3-byte nop - // call dword ptr [eax] - // - // Where EAX is also used as an argument to the stub dispatch helper. Make - // sure that the call target address is computed into EAX in this case. - if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + // On x86, we need to generate a very specific pattern for indirect VSD calls: + // + // 3-byte nop + // call dword ptr [eax] + // + // Where EAX is also used as an argument to the stub dispatch helper. Make + // sure that the call target address is computed into EAX in this case. + if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + { + assert(ctrlExpr->isIndir()); + MakeSrcContained(call, ctrlExpr); + } + else +#endif // TARGET_X86 + + + if (ctrlExpr->isIndir()) + { + bool canContainIndir = true; + if (call->IsFastTailCall()) { - assert(ctrlExpr->isIndir()); - MakeSrcContained(call, ctrlExpr); + // Currently we only allow fast tailcalls with indirections when no registers are required in the indirection. + // This is to ensure we won't need a register for the addressing mode since registers will have been cleaned up + // by the epilog at this point. + canContainIndir = + ctrlExpr->AsIndir()->HasBase() && ctrlExpr->AsIndir()->Base()->isContainedIntOrIImmed() && + !ctrlExpr->AsIndir()->HasIndex(); } - else -#endif // TARGET_X86 - if (ctrlExpr->isIndir()) + + if (canContainIndir) { // We may have cases where we have set a register target on the ctrlExpr, but if it // contained we must clear it. diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 854e4521ec9001..9d02e398ad7698 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1214,9 +1214,8 @@ int LinearScan::BuildCall(GenTreeCall* call) // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into a register. - if (call->IsFastTailCall()) + if (call->IsFastTailCall() && !ctrlExpr->isContained()) { - assert(!ctrlExpr->isContained()); // Fast tail call - make sure that call target is always computed in RAX // so that epilog sequence can generate "jmp rax" to achieve fast tail call. ctrlExprCandidates = RBM_RAX; @@ -1239,7 +1238,7 @@ int LinearScan::BuildCall(GenTreeCall* call) #if FEATURE_VARARG // If it is a fast tail call, it is already preferenced to use RAX. // Therefore, no need set src candidates on call tgt again. - if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall()) + if (call->IsVarargs() && callHasFloatRegArgs && (ctrlExprCandidates == RBM_NONE)) { // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index ff268b787e440d..02549319752d1d 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -3205,7 +3205,7 @@ private bool getTailCallHelpers(ref CORINFO_RESOLVED_TOKEN callToken, CORINFO_SI // Slow tailcalls are not supported yet // https://github.com/dotnet/runtime/issues/35423 #if READYTORUN - throw new NotImplementedException(nameof(getTailCallHelpers)); + throw new RequiresRuntimeJitException(nameof(getTailCallHelpers)); #else return false; #endif diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 6a325a5844c816..f3701699cca617 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -915,13 +915,7 @@ private void getFunctionEntryPoint(CORINFO_METHOD_STRUCT_* ftn, ref CORINFO_CONS private bool canTailCall(CORINFO_METHOD_STRUCT_* callerHnd, CORINFO_METHOD_STRUCT_* declaredCalleeHnd, CORINFO_METHOD_STRUCT_* exactCalleeHnd, bool fIsTailPrefix) { - if (fIsTailPrefix) - { - // FUTURE: Delay load fixups for tailcalls - throw new RequiresRuntimeJitException(nameof(fIsTailPrefix)); - } - - return false; + return true; } private MethodWithToken ComputeMethodWithToken(MethodDesc method, ref CORINFO_RESOLVED_TOKEN pResolvedToken, TypeDesc constrainedType, bool unboxing) From 0efbbc46cb24cba23b78ccc6c5e7d7b746cba040 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 31 Jul 2021 20:26:11 +0200 Subject: [PATCH 02/22] Support ARM64 --- src/coreclr/jit/codegenarmarch.cpp | 7 +++++++ src/coreclr/jit/codegencommon.cpp | 12 +++++++----- src/coreclr/jit/lowerxarch.cpp | 1 - 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 1953f93e076b9a..dad471137abe33 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -2364,6 +2364,13 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // Use IP0 on ARM64 and R12 on ARM32 as the call target register. inst_Mov(TYP_I_IMPL, REG_FASTTAILCALL_TARGET, target->GetRegNum(), /* canSkip */ true); } + else if (call->IsR2RRelativeIndir()) + { + assert(call->gtEntryPoint.accessType == IAT_PVALUE); + assert(call->gtControlExpr == nullptr); + // Call target is in REG_R2R_INDIRECT_PARAM. genFnEpilog knows to look for it there + // instead of from REG_FASTTAILCALL_TARGET. + } return; } diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index bf870d25d6d3de..c59bed2d2bc51d 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8025,7 +8025,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) // target address will be in gtDirectCallAddress. It is still possible that calls // to user funcs require indirection, in which case the control expression will // be non-null. - if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr)) + if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr) && !call->IsR2RRelativeIndir()) { assert(call->gtCallMethHnd != nullptr); // clang-format off @@ -8050,8 +8050,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) else { // Target requires indirection to obtain. genCallInstruction will have materialized - // it into REG_FASTTAILCALL_TARGET already, so just branch to it. - GetEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET); + // it into REG_R2R_INDIRECT_PARAM/REG_FASTTAILCALL_TARGET already, so just branch to it. + regNumber reg = call->IsR2RRelativeIndir() ? REG_R2R_INDIRECT_PARAM : REG_FASTTAILCALL_TARGET; + GetEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), reg); } } #endif // FEATURE_FASTTAILCALL @@ -8487,8 +8488,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (target->isContained()) { // We can only tailcall with indirect target if the target - // is an offset as otherwise we may need registers that - // could have been wiped out by the epilog. + // is an indirs off of an immediate as otherwise we may + // need registers that could have been wiped out by the + // epilog. noway_assert(target->isIndir()); assert(target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()); assert(!target->AsIndir()->HasIndex()); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 92dcae26cfbe50..34c8681f3a4379 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4485,7 +4485,6 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) else #endif // TARGET_X86 - if (ctrlExpr->isIndir()) { bool canContainIndir = true; From 5b0127e46c0be7b51298ed93fd206ea52ddba114 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 31 Jul 2021 20:35:35 +0200 Subject: [PATCH 03/22] Run jit-format --- src/coreclr/jit/codegencommon.cpp | 2 ++ src/coreclr/jit/lowerxarch.cpp | 17 ++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index c59bed2d2bc51d..efae9668d89e6f 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8496,6 +8496,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) assert(!target->AsIndir()->HasIndex()); assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler)); + // clang-format off GetEmitter()->emitIns_Call( emitter::EC_FUNC_TOKEN_INDIR, call->gtCallMethHnd, @@ -8510,6 +8511,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, true /* isJump */ ); + // clang-format on } else { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 34c8681f3a4379..ace9d543881a94 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4466,9 +4466,6 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) // we should never see a gtControlExpr whose type is void. assert(ctrlExpr->TypeGet() != TYP_VOID); - // In case of fast tail implemented as jmp it can be problematic to mark the control expression as contained - // since it may rely on registers that have been cleaned up. The exception is indirections off of constants - // that don't need any registers. #ifdef TARGET_X86 // On x86, we need to generate a very specific pattern for indirect VSD calls: // @@ -4485,17 +4482,19 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) else #endif // TARGET_X86 - if (ctrlExpr->isIndir()) + if (ctrlExpr->isIndir()) { bool canContainIndir = true; if (call->IsFastTailCall()) { - // Currently we only allow fast tailcalls with indirections when no registers are required in the indirection. - // This is to ensure we won't need a register for the addressing mode since registers will have been cleaned up + // Currently we only allow fast tailcalls with indirections when no registers are required in the + // indirection. + // This is to ensure we won't need a register for the addressing mode since registers will have been + // cleaned up // by the epilog at this point. - canContainIndir = - ctrlExpr->AsIndir()->HasBase() && ctrlExpr->AsIndir()->Base()->isContainedIntOrIImmed() && - !ctrlExpr->AsIndir()->HasIndex(); + canContainIndir = ctrlExpr->AsIndir()->HasBase() && + ctrlExpr->AsIndir()->Base()->isContainedIntOrIImmed() && + !ctrlExpr->AsIndir()->HasIndex(); } if (canContainIndir) From 2b360c8217399bca0edcec72df74852f1e0d8dec Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 31 Jul 2021 20:52:21 +0200 Subject: [PATCH 04/22] Fix non-R2R ARM build --- src/coreclr/jit/codegenarmarch.cpp | 2 ++ src/coreclr/jit/codegencommon.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index dad471137abe33..717a93ddbc9618 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -2364,6 +2364,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // Use IP0 on ARM64 and R12 on ARM32 as the call target register. inst_Mov(TYP_I_IMPL, REG_FASTTAILCALL_TARGET, target->GetRegNum(), /* canSkip */ true); } +#ifdef FEATURE_READYTORUN_COMPILER else if (call->IsR2RRelativeIndir()) { assert(call->gtEntryPoint.accessType == IAT_PVALUE); @@ -2371,6 +2372,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // Call target is in REG_R2R_INDIRECT_PARAM. genFnEpilog knows to look for it there // instead of from REG_FASTTAILCALL_TARGET. } +#endif return; } diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index efae9668d89e6f..ad6de4ef10250c 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8025,7 +8025,12 @@ void CodeGen::genFnEpilog(BasicBlock* block) // target address will be in gtDirectCallAddress. It is still possible that calls // to user funcs require indirection, in which case the control expression will // be non-null. - if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr) && !call->IsR2RRelativeIndir()) + bool isR2rRelIndir = false; +#ifdef FEATURE_READYTORUN_COMPILER + isR2rRelIndir = call->IsR2RRelativeIndir(); +#endif + + if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr) && !isR2rRelIndir) { assert(call->gtCallMethHnd != nullptr); // clang-format off @@ -8051,7 +8056,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) { // Target requires indirection to obtain. genCallInstruction will have materialized // it into REG_R2R_INDIRECT_PARAM/REG_FASTTAILCALL_TARGET already, so just branch to it. - regNumber reg = call->IsR2RRelativeIndir() ? REG_R2R_INDIRECT_PARAM : REG_FASTTAILCALL_TARGET; + regNumber reg = isR2rRelIndir ? REG_R2R_INDIRECT_PARAM : REG_FASTTAILCALL_TARGET; GetEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), reg); } } From ae8fd46a49f243aac21412291d9a835b2092693e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 31 Jul 2021 22:40:31 +0200 Subject: [PATCH 05/22] Fix recursive-call-to-loop optimization with non-standard args --- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/morph.cpp | 71 +++++++++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 06f3763f30ba56..a786484c42a2fe 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6057,9 +6057,11 @@ class Compiler GenTree* fgMorphPotentialTailCall(GenTreeCall* call); GenTree* fgGetStubAddrArg(GenTreeCall* call); + unsigned fgGetArgTabEntryParameterLclNum(fgArgTabEntry* argTabEntry); void fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall); Statement* fgAssignRecursiveCallArgToCallerParam(GenTree* arg, fgArgTabEntry* argTabEntry, + unsigned lclParamNum, BasicBlock* block, IL_OFFSETX callILOffset, Statement* tmpAssignmentInsertionPoint, diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index cb04806feebbbc..2070f4ae975905 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -8834,6 +8834,35 @@ GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) return stubAddrArg; } +//------------------------------------------------------------------------------ +// fgGetArgTabEntryParameterLclNum : Get the lcl num for the parameter that +// corresponds to the argument to a recursive call. +// +// Notes: +// Due to non-standard args this is not just fgArgTabEntry::argNum. +// For example, in R2R compilations we will have added a non-standard +// arg for the R2R indirection cell. +// +// Arguments: +// argTabEntry - the arg +// +unsigned fgGetArgTabEntryParameterLclNum(GenTreeCall* call, fgArgTabEntry* argTabEntry) +{ + fgArgInfo* argInfo = call->fgArgInfo; + unsigned argCount = argInfo->ArgCount(); + fgArgTabEntry** argTable = argInfo->ArgTable(); + + unsigned numNonStandardBefore = 0; + for (unsigned i = 0; i < argCount; i++) + { + fgArgTabEntry* arg = argTable[i]; + if (arg->isNonStandard && (arg->argNum < argTabEntry->argNum)) + numNonStandardBefore++; + } + + return argTabEntry->argNum - numNonStandardBefore; +} + //------------------------------------------------------------------------------ // fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop. // @@ -8925,13 +8954,17 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa { // This is an actual argument that needs to be assigned to the corresponding caller parameter. fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); - Statement* paramAssignStmt = - fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset, - tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); - if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) + if (!curArgTabEntry->isNonStandard) { - // All temp assignments will happen before the first param assignment. - tmpAssignmentInsertionPoint = paramAssignStmt; + Statement* paramAssignStmt = + fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, fgGetArgTabEntryParameterLclNum(recursiveTailCall, curArgTabEntry), + block, callILOffset, + tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) + { + // All temp assignments will happen before the first param assignment. + tmpAssignmentInsertionPoint = paramAssignStmt; + } } } } @@ -8945,14 +8978,17 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. GenTree* lateArg = use.GetNode(); fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); - Statement* paramAssignStmt = - fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset, - tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); - - if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) + if (!curArgTabEntry->isNonStandard) { - // All temp assignments will happen before the first param assignment. - tmpAssignmentInsertionPoint = paramAssignStmt; + Statement* paramAssignStmt = + fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, fgGetArgTabEntryParameterLclNum(recursiveTailCall, curArgTabEntry), block, callILOffset, + tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + + if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) + { + // All temp assignments will happen before the first param assignment. + tmpAssignmentInsertionPoint = paramAssignStmt; + } } lateArgIndex++; } @@ -9048,6 +9084,7 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // Arguments: // arg - argument to assign // argTabEntry - argument table entry corresponding to arg +// lclParamNum - the lcl num of the parameter // block --- basic block the call is in // callILOffset - IL offset of the call // tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary) @@ -9058,6 +9095,7 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, fgArgTabEntry* argTabEntry, + unsigned lclParamNum, BasicBlock* block, IL_OFFSETX callILOffset, Statement* tmpAssignmentInsertionPoint, @@ -9067,7 +9105,6 @@ Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, // some argument trees may reference parameters directly. GenTree* argInTemp = nullptr; - unsigned originalArgNum = argTabEntry->argNum; bool needToAssignParameter = true; // TODO-CQ: enable calls with struct arguments passed in registers. @@ -9087,7 +9124,7 @@ Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, // The argument is a non-parameter local so it doesn't need to be assigned to a temp. argInTemp = arg; } - else if (lclNum == originalArgNum) + else if (lclNum == lclParamNum) { // The argument is the same parameter local that we were about to assign so // we can skip the assignment. @@ -9118,9 +9155,9 @@ Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, } // Now assign the temp to the parameter. - LclVarDsc* paramDsc = lvaTable + originalArgNum; + LclVarDsc* paramDsc = lvaTable + lclParamNum; assert(paramDsc->lvIsParam); - GenTree* paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType); + GenTree* paramDest = gtNewLclvNode(lclParamNum, paramDsc->lvType); GenTree* paramAssignNode = gtNewAssignNode(paramDest, argInTemp); paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset); From e45f0c59ba897d26cd86f86d873516b889d55ded Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 1 Aug 2021 17:56:33 +0200 Subject: [PATCH 06/22] Implement new delay load helper for fast tailcalls --- .../superpmi-shim-collector/icorjitinfo.cpp | 5 ++ .../superpmi-shim-counter/icorjitinfo.cpp | 7 ++ .../superpmi-shim-simple/icorjitinfo.cpp | 6 ++ .../ToolBox/superpmi/superpmi/icorjitinfo.cpp | 5 ++ src/coreclr/inc/corinfo.h | 7 ++ src/coreclr/inc/icorjitinfoimpl_generated.h | 3 + src/coreclr/inc/jiteeversionguid.h | 11 +-- src/coreclr/jit/ICorJitInfo_API_names.h | 1 + src/coreclr/jit/ICorJitInfo_API_wrapper.hpp | 8 +++ src/coreclr/jit/codegenarmarch.cpp | 5 +- src/coreclr/jit/codegencommon.cpp | 63 ++++++++++------ src/coreclr/jit/codegenxarch.cpp | 6 ++ src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/gentree.h | 6 +- src/coreclr/jit/lower.cpp | 14 ++-- src/coreclr/jit/lowerxarch.cpp | 17 +++-- src/coreclr/jit/morph.cpp | 71 +++++++++++++++---- .../tools/Common/JitInterface/CorInfoBase.cs | 49 ++++++++----- .../ThunkGenerator/ThunkInput.txt | 1 + .../ReadyToRun/DelayLoadHelperImport.cs | 15 +++- .../ReadyToRun/DelayLoadHelperMethodImport.cs | 2 +- .../ReadyToRun/DelayLoadMethodImport.cs | 12 ++-- .../ReadyToRun/ImportThunk.cs | 9 ++- .../ReadyToRun/MethodFixupSignature.cs | 13 ++-- .../ReadyToRun/Target_ARM/ImportThunk.cs | 1 + .../ReadyToRun/Target_ARM64/ImportThunk.cs | 1 + .../ReadyToRun/Target_X64/ImportThunk.cs | 14 ++++ .../ReadyToRun/Target_X86/ImportThunk.cs | 3 + .../ReadyToRunCodegenNodeFactory.cs | 33 +++++---- .../ReadyToRunSymbolNodeFactory.cs | 6 +- .../DependencyAnalysis/TypeAndMethod.cs | 10 ++- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 25 ++++++- .../tools/aot/jitinterface/jitinterface.h | 9 +++ src/coreclr/vm/jitinterface.cpp | 5 ++ 34 files changed, 336 insertions(+), 109 deletions(-) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp index 4390113cf02798..63fedc2b9da69c 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1776,6 +1776,11 @@ bool interceptor_ICJI::getTailCallHelpers( return result; } +void interceptor_ICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) +{ + mc->cr->AddCall("updateEntryPointForTailCall"); +} + // Stuff directly on ICorJitInfo // Returns extended flags for a particular compilation instance. diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp index 445cc2a490e5ba..297155fccc5c6b 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp @@ -1250,6 +1250,13 @@ bool interceptor_ICJI::notifyInstructionSetUsage( return original_ICorJitInfo->notifyInstructionSetUsage(instructionSet, supportEnabled); } +void interceptor_ICJI::updateEntryPointForTailCall( + CORINFO_CONST_LOOKUP* entryPoint) +{ + mcs->AddCall("updateEntryPointForTailCall"); + original_ICorJitInfo->updateEntryPointForTailCall(entryPoint); +} + void interceptor_ICJI::allocMem( AllocMemArgs* pArgs) { diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp index 7b04e139c4d016..a9e4890065aee9 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp @@ -1094,6 +1094,12 @@ bool interceptor_ICJI::notifyInstructionSetUsage( return original_ICorJitInfo->notifyInstructionSetUsage(instructionSet, supportEnabled); } +void interceptor_ICJI::updateEntryPointForTailCall( + CORINFO_CONST_LOOKUP* entryPoint) +{ + original_ICorJitInfo->updateEntryPointForTailCall(entryPoint); +} + void interceptor_ICJI::allocMem( AllocMemArgs* pArgs) { diff --git a/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp index b2cb35339adcc7..cfbc852216e485 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp @@ -1556,6 +1556,11 @@ bool MyICJI::notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bo return supported; } +void MyICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) +{ + jitInstance->mc->cr->AddCall("updateEntryPointForTailCall"); +} + // Stuff directly on ICorJitInfo // Returns extended flags for a particular compilation instance. diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index a0093501b561bc..075cf403a155ea 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -3173,6 +3173,13 @@ class ICorDynamicInfo : public ICorStaticInfo CORINFO_InstructionSet instructionSet, bool supportEnabled ) = 0; + + // Notify EE that JIT needs an entry-point that is tail-callable. + // This is used for AOT on x64 to support delay loaded fast tailcalls. + // Normally the indirection cell is retrieved from the return address, + // but for tailcalls, the contract is that JIT leaves the indirection cell in + // a register during tailcall. + virtual void updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) = 0; }; /**********************************************************************************/ diff --git a/src/coreclr/inc/icorjitinfoimpl_generated.h b/src/coreclr/inc/icorjitinfoimpl_generated.h index b3b77ece974a87..2f9d502e3bce51 100644 --- a/src/coreclr/inc/icorjitinfoimpl_generated.h +++ b/src/coreclr/inc/icorjitinfoimpl_generated.h @@ -635,6 +635,9 @@ bool notifyInstructionSetUsage( CORINFO_InstructionSet instructionSet, bool supportEnabled) override; +void updateEntryPointForTailCall( + CORINFO_CONST_LOOKUP* entryPoint) override; + void allocMem( AllocMemArgs* pArgs) override; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index f2d6921bc76cd7..35e0fa641eaabb 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,12 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* c2e4a466-795d-4e64-a776-0ae7b2eed65b */ - 0xc2e4a466, - 0x795d, - 0x4e64, - {0xa7, 0x76, 0x0a, 0xe7, 0xb2, 0xee, 0xd6, 0x5b} +constexpr GUID JITEEVersionIdentifier = { /* 7374274c-5cb5-4c41-872e-01f438ac1548 */ + + 0x7374274c, + 0x5cb5, + 0x4c41, + { 0x87, 0x2e, 0x1, 0xf4, 0x38, 0xac, 0x15, 0x48 } }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/jit/ICorJitInfo_API_names.h b/src/coreclr/jit/ICorJitInfo_API_names.h index f29e37f91baae3..0589c9b4a166c5 100644 --- a/src/coreclr/jit/ICorJitInfo_API_names.h +++ b/src/coreclr/jit/ICorJitInfo_API_names.h @@ -160,6 +160,7 @@ DEF_CLR_API(MethodCompileComplete) DEF_CLR_API(getTailCallHelpers) DEF_CLR_API(convertPInvokeCalliToCall) DEF_CLR_API(notifyInstructionSetUsage) +DEF_CLR_API(updateEntryPointForTailCall) DEF_CLR_API(allocMem) DEF_CLR_API(reserveUnwindInfo) DEF_CLR_API(allocUnwindInfo) diff --git a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp index 66292765480a38..72b12bd6cb1b7c 100644 --- a/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp +++ b/src/coreclr/jit/ICorJitInfo_API_wrapper.hpp @@ -1527,6 +1527,14 @@ bool WrapICorJitInfo::notifyInstructionSetUsage( return temp; } +void WrapICorJitInfo::updateEntryPointForTailCall( + CORINFO_CONST_LOOKUP* entryPoint) +{ + API_ENTER(updateEntryPointForTailCall); + wrapHnd->updateEntryPointForTailCall(entryPoint); + API_LEAVE(updateEntryPointForTailCall); +} + void WrapICorJitInfo::allocMem( AllocMemArgs* pArgs) { diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 717a93ddbc9618..c67f23c6232e22 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -2364,15 +2364,12 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // Use IP0 on ARM64 and R12 on ARM32 as the call target register. inst_Mov(TYP_I_IMPL, REG_FASTTAILCALL_TARGET, target->GetRegNum(), /* canSkip */ true); } -#ifdef FEATURE_READYTORUN_COMPILER else if (call->IsR2RRelativeIndir()) { assert(call->gtEntryPoint.accessType == IAT_PVALUE); assert(call->gtControlExpr == nullptr); - // Call target is in REG_R2R_INDIRECT_PARAM. genFnEpilog knows to look for it there - // instead of from REG_FASTTAILCALL_TARGET. + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET, REG_R2R_INDIRECT_PARAM); } -#endif return; } diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ad6de4ef10250c..67d277e84840f3 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8025,12 +8025,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) // target address will be in gtDirectCallAddress. It is still possible that calls // to user funcs require indirection, in which case the control expression will // be non-null. - bool isR2rRelIndir = false; -#ifdef FEATURE_READYTORUN_COMPILER - isR2rRelIndir = call->IsR2RRelativeIndir(); -#endif - if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr) && !isR2rRelIndir) + if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr) && !call->IsR2RRelativeIndir()) { assert(call->gtCallMethHnd != nullptr); // clang-format off @@ -8054,10 +8050,10 @@ void CodeGen::genFnEpilog(BasicBlock* block) } else { - // Target requires indirection to obtain. genCallInstruction will have materialized - // it into REG_R2R_INDIRECT_PARAM/REG_FASTTAILCALL_TARGET already, so just branch to it. - regNumber reg = isR2rRelIndir ? REG_R2R_INDIRECT_PARAM : REG_FASTTAILCALL_TARGET; - GetEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), reg); + // Target requires indirection to obtain. genCallInstruction + // will have materialized it into REG_FASTTAILCALL_TARGET + // already, so just branch to it. + GetEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET); } } #endif // FEATURE_FASTTAILCALL @@ -8470,21 +8466,42 @@ void CodeGen::genFnEpilog(BasicBlock* block) if ((callType == CT_USER_FUNC) && (call->gtControlExpr == nullptr)) { assert(call->gtCallMethHnd != nullptr); + // clang-format off - GetEmitter()->emitIns_Call( - emitter::EC_FUNC_TOKEN, - call->gtCallMethHnd, - INDEBUG_LDISASM_COMMA(nullptr) - call->gtDirectCallAddress, - 0, // argSize - EA_UNKNOWN // retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),// secondRetSize - gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, - BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */ - true /* isJump */ - ); + if (call->IsR2RRelativeIndir()) + { + GetEmitter()->emitIns_Call( + emitter::EC_INDIR_ARD, + call->gtCallMethHnd, + INDEBUG_LDISASM_COMMA(nullptr) + 0, // addr + 0, // argSize + EA_UNKNOWN // retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),// secondRetSize + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, REG_RAX, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */ + true /* isJump */ + ); + } + else + { + GetEmitter()->emitIns_Call( + emitter::EC_FUNC_TOKEN, + call->gtCallMethHnd, + INDEBUG_LDISASM_COMMA(nullptr) + call->gtDirectCallAddress, + 0, // argSize + EA_UNKNOWN // retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),// secondRetSize + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */ + true /* isJump */ + ); + } // clang-format on } else diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 959e06e8b9ffb8..88698a7f4c4223 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5191,6 +5191,12 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // If this is indirect then we go through RAX with epilog sequence // generating "jmp rax". Otherwise epilog will try to generate a // rip-relative jump. + // Note that the indirection cell is required to be in RAX for R2R delay load helper + // that expects to find it there (see ImportThunk.Kind.DelayLoadHelperWithExistingIndirectionCell + // in crossgen2). We handle this by marking the target as contained and then emitting + // mov rax, cell + // jmp [rax] + // in codegen. if (target != nullptr && !target->isContained()) { genConsumeReg(target); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a786484c42a2fe..e3e4bf89e61d2f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6057,7 +6057,7 @@ class Compiler GenTree* fgMorphPotentialTailCall(GenTreeCall* call); GenTree* fgGetStubAddrArg(GenTreeCall* call); - unsigned fgGetArgTabEntryParameterLclNum(fgArgTabEntry* argTabEntry); + unsigned fgGetArgTabEntryParameterLclNum(GenTreeCall* call, fgArgTabEntry* argTabEntry); void fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall); Statement* fgAssignRecursiveCallArgToCallerParam(GenTree* arg, fgArgTabEntry* argTabEntry, diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 2c8ad35b8cdb0f..3f91ea213dd2d7 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4598,11 +4598,15 @@ struct GenTreeCall final : public GenTree return (gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0; } -#ifdef FEATURE_READYTORUN_COMPILER bool IsR2RRelativeIndir() const { +#ifdef FEATURE_READYTORUN_COMPILER return (gtCallMoreFlags & GTF_CALL_M_R2R_REL_INDIRECT) != 0; +#else + return false; +#endif } +#ifdef FEATURE_READYTORUN_COMPILER void setEntryPoint(const CORINFO_CONST_LOOKUP& entryPoint) { gtEntryPoint = entryPoint; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a328206151a001..00801ef75c6d07 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3705,15 +3705,19 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call) case IAT_PVALUE: { - bool isR2RRelativeIndir = false; -#if defined(FEATURE_READYTORUN_COMPILER) && defined(TARGET_ARMARCH) + bool hasIndirectionCell = false; +#if defined(TARGET_ARMARCH) // Skip inserting the indirection node to load the address that is already // computed in REG_R2R_INDIRECT_PARAM as a hidden parameter. Instead during the // codegen, just load the call target from REG_R2R_INDIRECT_PARAM. - isR2RRelativeIndir = call->IsR2RRelativeIndir(); -#endif // FEATURE_READYTORUN_COMPILER && TARGET_ARMARCH + hasIndirectionCell = call->IsR2RRelativeIndir(); +#elif defined(TARGET_XARCH) + // For xarch we usually get the indirection cell from the return address, + // except for fast tailcalls where we do the same as ARM. + hasIndirectionCell = call->IsR2RRelativeIndir() && call->IsFastTailCall(); +#endif - if (!isR2RRelativeIndir) + if (!hasIndirectionCell) { // Non-virtual direct calls to addresses accessed by // a single indirection. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index ace9d543881a94..8fb99594b3c41d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4487,14 +4487,21 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) bool canContainIndir = true; if (call->IsFastTailCall()) { - // Currently we only allow fast tailcalls with indirections when no registers are required in the - // indirection. - // This is to ensure we won't need a register for the addressing mode since registers will have been - // cleaned up - // by the epilog at this point. + // Currently we only allow fast tailcalls with indirections + // when no registers are required in the indirection. This is + // to ensure we won't need a register for the addressing mode + // since registers will have been cleaned up by the epilog at + // this point. canContainIndir = ctrlExpr->AsIndir()->HasBase() && ctrlExpr->AsIndir()->Base()->isContainedIntOrIImmed() && !ctrlExpr->AsIndir()->HasIndex(); + + // For R2R we cannot allow containment as the delay load helper + // expects the indirection cell to be left in a register. +#ifdef FEATURE_READYTORUN_COMPILER + if (call->gtEntryPoint.addr != nullptr) + canContainIndir = false; +#endif } if (canContainIndir) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2070f4ae975905..dd6c851c66548b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2831,10 +2831,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallType = CT_HELPER; call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); } -#if defined(FEATURE_READYTORUN_COMPILER) && defined(TARGET_ARMARCH) +#if defined(FEATURE_READYTORUN_COMPILER) +#if defined(TARGET_ARMARCH) // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg() // for indirection cell address, which ZapIndirectHelperThunk expects. - if (call->IsR2RRelativeIndir()) + if (call->IsR2RRelativeIndir() && !call->IsDelegateInvoke()) { assert(call->gtEntryPoint.addr != nullptr); @@ -2858,8 +2859,33 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) numArgs++; nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum()); } +#elif defined(TARGET_XARCH) + // For xarch fast tailcalls we do the same as ARM, leaving the indirection cell in the + // fast tailcall register. + // Note that we call this before we know if something will be a fast tailcall or not. + // That's ok; after making something a tailcall, we will invalidate this information + // and reconstruct it if necessary. The tailcalling decision does not change since + // this is a non-standard arg in a register. + if (call->IsR2RRelativeIndir() && call->IsFastTailCall() && !call->IsDelegateInvoke()) + { + assert(call->gtEntryPoint.addr != nullptr); -#endif // FEATURE_READYTORUN_COMPILER && TARGET_ARMARCH + size_t addrValue = (size_t)call->gtEntryPoint.addr; + GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR); +#ifdef DEBUG + indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; +#endif + indirectCellAddress->SetRegNum(REG_RAX); + + // Push the stub address onto the list of arguments. + call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); + + numArgs++; + nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum()); + } + +#endif +#endif // Allocate the fgArgInfo for the call node; // @@ -7663,6 +7689,22 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif + // For R2R we might need a different entry point for this call if we are doing a tailcall. + // The reason is that the normal delay load helper uses the return address to find the indirection + // cell in x64, but now the JIT is expected to leave the indirection cell in rax. + // We optimize delegate invocations manually in the JIT so skip this for those. +#ifdef FEATURE_READYTORUN_COMPILER + if (call->IsR2RRelativeIndir() && canFastTailCall && !fastTailCallToLoop && !call->IsDelegateInvoke()) + { + info.compCompHnd->updateEntryPointForTailCall(&call->gtEntryPoint); + +#ifdef TARGET_XARCH + // We need to redo arg info to add the indirection cell arg. + call->fgArgInfo = nullptr; +#endif + } +#endif + // If this block has a flow successor, make suitable updates. // BasicBlock* const nextBlock = compCurBB->GetUniqueSucc(); @@ -8846,11 +8888,11 @@ GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call) // Arguments: // argTabEntry - the arg // -unsigned fgGetArgTabEntryParameterLclNum(GenTreeCall* call, fgArgTabEntry* argTabEntry) +unsigned Compiler::fgGetArgTabEntryParameterLclNum(GenTreeCall* call, fgArgTabEntry* argTabEntry) { - fgArgInfo* argInfo = call->fgArgInfo; - unsigned argCount = argInfo->ArgCount(); - fgArgTabEntry** argTable = argInfo->ArgTable(); + fgArgInfo* argInfo = call->fgArgInfo; + unsigned argCount = argInfo->ArgCount(); + fgArgTabEntry** argTable = argInfo->ArgTable(); unsigned numNonStandardBefore = 0; for (unsigned i = 0; i < argCount; i++) @@ -8957,9 +8999,11 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa if (!curArgTabEntry->isNonStandard) { Statement* paramAssignStmt = - fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, fgGetArgTabEntryParameterLclNum(recursiveTailCall, curArgTabEntry), - block, callILOffset, - tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, + fgGetArgTabEntryParameterLclNum(recursiveTailCall, + curArgTabEntry), + block, callILOffset, tmpAssignmentInsertionPoint, + paramAssignmentInsertionPoint); if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) { // All temp assignments will happen before the first param assignment. @@ -8981,8 +9025,11 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa if (!curArgTabEntry->isNonStandard) { Statement* paramAssignStmt = - fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, fgGetArgTabEntryParameterLclNum(recursiveTailCall, curArgTabEntry), block, callILOffset, - tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint); + fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, + fgGetArgTabEntryParameterLclNum(recursiveTailCall, + curArgTabEntry), + block, callILOffset, tmpAssignmentInsertionPoint, + paramAssignmentInsertionPoint); if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr)) { diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs b/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs index db9eb3f137a588..a1c88aa19e4ed9 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs @@ -2317,6 +2317,20 @@ static byte _notifyInstructionSetUsage(IntPtr thisHandle, IntPtr* ppException, I } } + [UnmanagedCallersOnly] + static void _updateEntryPointForTailCall(IntPtr thisHandle, IntPtr* ppException, CORINFO_CONST_LOOKUP* entryPoint) + { + var _this = GetThis(thisHandle); + try + { + _this.updateEntryPointForTailCall(ref *entryPoint); + } + catch (Exception ex) + { + *ppException = _this.AllocException(ex); + } + } + [UnmanagedCallersOnly] static void _allocMem(IntPtr thisHandle, IntPtr* ppException, AllocMemArgs* pArgs) { @@ -2552,7 +2566,7 @@ static uint _getJitFlags(IntPtr thisHandle, IntPtr* ppException, CORJIT_FLAGS* f static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 172); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 173); callbacks[0] = (delegate* unmanaged)&_isJitIntrinsic; callbacks[1] = (delegate* unmanaged)&_getMethodAttribs; @@ -2710,22 +2724,23 @@ static IntPtr GetUnmanagedCallbacks() callbacks[153] = (delegate* unmanaged)&_getTailCallHelpers; callbacks[154] = (delegate* unmanaged)&_convertPInvokeCalliToCall; callbacks[155] = (delegate* unmanaged)&_notifyInstructionSetUsage; - callbacks[156] = (delegate* unmanaged)&_allocMem; - callbacks[157] = (delegate* unmanaged)&_reserveUnwindInfo; - callbacks[158] = (delegate* unmanaged)&_allocUnwindInfo; - callbacks[159] = (delegate* unmanaged)&_allocGCInfo; - callbacks[160] = (delegate* unmanaged)&_setEHcount; - callbacks[161] = (delegate* unmanaged)&_setEHinfo; - callbacks[162] = (delegate* unmanaged)&_logMsg; - callbacks[163] = (delegate* unmanaged)&_doAssert; - callbacks[164] = (delegate* unmanaged)&_reportFatalError; - callbacks[165] = (delegate* unmanaged)&_getPgoInstrumentationResults; - callbacks[166] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; - callbacks[167] = (delegate* unmanaged)&_recordCallSite; - callbacks[168] = (delegate* unmanaged)&_recordRelocation; - callbacks[169] = (delegate* unmanaged)&_getRelocTypeHint; - callbacks[170] = (delegate* unmanaged)&_getExpectedTargetArchitecture; - callbacks[171] = (delegate* unmanaged)&_getJitFlags; + callbacks[156] = (delegate* unmanaged)&_updateEntryPointForTailCall; + callbacks[157] = (delegate* unmanaged)&_allocMem; + callbacks[158] = (delegate* unmanaged)&_reserveUnwindInfo; + callbacks[159] = (delegate* unmanaged)&_allocUnwindInfo; + callbacks[160] = (delegate* unmanaged)&_allocGCInfo; + callbacks[161] = (delegate* unmanaged)&_setEHcount; + callbacks[162] = (delegate* unmanaged)&_setEHinfo; + callbacks[163] = (delegate* unmanaged)&_logMsg; + callbacks[164] = (delegate* unmanaged)&_doAssert; + callbacks[165] = (delegate* unmanaged)&_reportFatalError; + callbacks[166] = (delegate* unmanaged)&_getPgoInstrumentationResults; + callbacks[167] = (delegate* unmanaged)&_allocPgoInstrumentationBySchema; + callbacks[168] = (delegate* unmanaged)&_recordCallSite; + callbacks[169] = (delegate* unmanaged)&_recordRelocation; + callbacks[170] = (delegate* unmanaged)&_getRelocTypeHint; + callbacks[171] = (delegate* unmanaged)&_getExpectedTargetArchitecture; + callbacks[172] = (delegate* unmanaged)&_getJitFlags; return (IntPtr)callbacks; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt index 22e390f52858ea..1d417cffca7779 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt @@ -309,6 +309,7 @@ FUNCTIONS bool getTailCallHelpers(CORINFO_RESOLVED_TOKEN* callToken, CORINFO_SIG_INFO* sig, CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult); bool convertPInvokeCalliToCall(CORINFO_RESOLVED_TOKEN * pResolvedToken, bool mustConvert); bool notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet,bool supportEnabled); + void updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint); void allocMem(AllocMemArgs* pArgs); void reserveUnwindInfo(bool isFunclet, bool isColdCode, uint32_t unwindSize) void allocUnwindInfo(uint8_t* pHotCode, uint8_t* pColdCode, uint32_t startOffset, uint32_t endOffset, uint32_t unwindSize, uint8_t* pUnwindBlock, CorJitFuncKind funcKind) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs index d921697870ccef..1b2674ab009bf2 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperImport.cs @@ -17,6 +17,7 @@ public class DelayLoadHelperImport : Import private readonly ReadyToRunHelper _helper; private readonly bool _useVirtualCall; + private readonly bool _useJumpableStub; private readonly ImportThunk _delayLoadHelper; @@ -26,12 +27,14 @@ public DelayLoadHelperImport( ReadyToRunHelper helper, Signature instanceSignature, bool useVirtualCall = false, + bool useJumpableStub = false, MethodDesc callingMethod = null) : base(importSectionNode, instanceSignature, callingMethod) { _helper = helper; _useVirtualCall = useVirtualCall; - _delayLoadHelper = factory.ImportThunk(helper, importSectionNode, useVirtualCall); + _useJumpableStub = useJumpableStub; + _delayLoadHelper = factory.ImportThunk(helper, importSectionNode, useVirtualCall, useJumpableStub); } public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) @@ -41,6 +44,10 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde { sb.Append("[VSD] "); } + if (_useJumpableStub) + { + sb.Append("[JMP] "); + } sb.Append(_helper.ToString()); sb.Append(") -> "); ImportSignature.AppendMangledName(nameMangler, sb); @@ -79,7 +86,11 @@ public override IEnumerable GetStaticDependencies(NodeFacto public override int CompareToImpl(ISortableNode other, CompilerComparer comparer) { DelayLoadHelperImport otherNode = (DelayLoadHelperImport)other; - int result = _useVirtualCall.CompareTo(otherNode._useVirtualCall); + int result = _useJumpableStub.CompareTo(otherNode._useJumpableStub); + if (result != 0) + return result; + + result = _useVirtualCall.CompareTo(otherNode._useVirtualCall); if (result != 0) return result; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs index 3bf386801c68e2..d034ba56d87b2f 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadHelperMethodImport.cs @@ -30,7 +30,7 @@ public DelayLoadHelperMethodImport( bool useInstantiatingStub, Signature instanceSignature, MethodDesc callingMethod = null) - : base(factory, importSectionNode, helper, instanceSignature, useVirtualCall, callingMethod) + : base(factory, importSectionNode, helper, instanceSignature, useVirtualCall, useJumpableStub: false, callingMethod) { _method = method; _useInstantiatingStub = useInstantiatingStub; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadMethodImport.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadMethodImport.cs index 47451cadd92f7f..dd0d49c8db19c2 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadMethodImport.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DelayLoadMethodImport.cs @@ -12,14 +12,14 @@ namespace ILCompiler.DependencyAnalysis.ReadyToRun public class DelayLoadMethodImport : DelayLoadHelperImport, IMethodNode { private readonly MethodWithGCInfo _localMethod; - private readonly MethodWithToken _method; public DelayLoadMethodImport( NodeFactory factory, ReadyToRunFixupKind fixupKind, MethodWithToken method, MethodWithGCInfo localMethod, - bool isInstantiatingStub) + bool isInstantiatingStub, + bool isJump) : base( factory, factory.MethodImports, @@ -27,13 +27,15 @@ public DelayLoadMethodImport( factory.MethodSignature( fixupKind, method, - isInstantiatingStub)) + isInstantiatingStub), + useJumpableStub: isJump) { _localMethod = localMethod; - _method = method; + MethodWithToken = method; } - public MethodDesc Method => _method.Method; + public MethodWithToken MethodWithToken { get; } + public MethodDesc Method => MethodWithToken.Method; public MethodWithGCInfo MethodCodeNode => _localMethod; public override int ClassCode => 459923351; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportThunk.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportThunk.cs index 44525421f9b28c..3895244c001972 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportThunk.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ImportThunk.cs @@ -18,6 +18,7 @@ enum Kind Eager, Lazy, DelayLoadHelper, + DelayLoadHelperWithExistingIndirectionCell, VirtualStubDispatch, } @@ -31,7 +32,7 @@ enum Kind /// Import thunks are used to call a runtime-provided helper which fixes up an indirection cell in a particular /// import section. Optionally they may also contain a relocation for a specific indirection cell to fix up. /// - public ImportThunk(NodeFactory factory, ReadyToRunHelper helperId, ImportSectionNode containingImportSection, bool useVirtualCall) + public ImportThunk(NodeFactory factory, ReadyToRunHelper helperId, ImportSectionNode containingImportSection, bool useVirtualCall, bool useJumpableStub) { _helperCell = factory.GetReadyToRunHelperCell(helperId); _containingImportSection = containingImportSection; @@ -40,6 +41,10 @@ public ImportThunk(NodeFactory factory, ReadyToRunHelper helperId, ImportSection { _thunkKind = Kind.VirtualStubDispatch; } + else if (useJumpableStub) + { + _thunkKind = Kind.DelayLoadHelperWithExistingIndirectionCell; + } else if (helperId == ReadyToRunHelper.GetString) { _thunkKind = Kind.Lazy; @@ -61,7 +66,7 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde { sb.Append("DelayLoadHelper->"); _helperCell.AppendMangledName(nameMangler, sb); - sb.Append($"(ImportSection:{_containingImportSection.Name})"); + sb.Append($"(ImportSection:{_containingImportSection.Name},Kind:{_thunkKind})"); } protected override string GetName(NodeFactory factory) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodFixupSignature.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodFixupSignature.cs index eaeddd907ea5a6..1de6cfcfa9251b 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodFixupSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/MethodFixupSignature.cs @@ -21,8 +21,6 @@ public class MethodFixupSignature : Signature private readonly MethodWithToken _method; - private readonly bool _isInstantiatingStub; - public MethodFixupSignature( ReadyToRunFixupKind fixupKind, MethodWithToken method, @@ -30,7 +28,7 @@ public MethodFixupSignature( { _fixupKind = fixupKind; _method = method; - _isInstantiatingStub = isInstantiatingStub; + IsInstantiatingStub = isInstantiatingStub; // Ensure types in signature are loadable and resolvable, otherwise we'll fail later while emitting the signature CompilerTypeSystemContext compilerContext = (CompilerTypeSystemContext)method.Method.Context; @@ -42,6 +40,7 @@ public MethodFixupSignature( } public MethodDesc Method => _method.Method; + public bool IsInstantiatingStub { get; } public override int ClassCode => 150063499; @@ -61,7 +60,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) // Optimize some of the fixups into a more compact form ReadyToRunFixupKind fixupKind = _fixupKind; bool optimized = false; - if (!_method.Unboxing && !_isInstantiatingStub && _method.ConstrainedType == null && + if (!_method.Unboxing && !IsInstantiatingStub && _method.ConstrainedType == null && fixupKind == ReadyToRunFixupKind.MethodEntry) { if (!_method.Method.HasInstantiation && !_method.Method.OwningType.HasInstantiation && !_method.Method.OwningType.IsArray) @@ -111,7 +110,7 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) } else { - dataBuilder.EmitMethodSignature(method, enforceDefEncoding: false, enforceOwningType: false, innerContext, _isInstantiatingStub); + dataBuilder.EmitMethodSignature(method, enforceDefEncoding: false, enforceOwningType: false, innerContext, IsInstantiatingStub); } return dataBuilder.ToObjectData(); @@ -122,7 +121,7 @@ public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilde sb.Append(nameMangler.CompilationUnitPrefix); sb.Append($@"MethodFixupSignature("); sb.Append(_fixupKind.ToString()); - if (_isInstantiatingStub) + if (IsInstantiatingStub) { sb.Append(" [INST]"); } @@ -137,7 +136,7 @@ public override int CompareToImpl(ISortableNode other, CompilerComparer comparer if (result != 0) return result; - result = _isInstantiatingStub.CompareTo(otherNode._isInstantiatingStub); + result = IsInstantiatingStub.CompareTo(otherNode.IsInstantiatingStub); if (result != 0) return result; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM/ImportThunk.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM/ImportThunk.cs index ffe0835729ac89..178e29f30848e7 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM/ImportThunk.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM/ImportThunk.cs @@ -30,6 +30,7 @@ protected override void EmitCode(NodeFactory factory, ref ARM.ARMEmitter instruc case Kind.DelayLoadHelper: case Kind.VirtualStubDispatch: + case Kind.DelayLoadHelperWithExistingIndirectionCell: // r4 contains indirection cell // push r4 instructionEncoder.EmitPUSH(Register.R4); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM64/ImportThunk.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM64/ImportThunk.cs index a3be3e0b3e5cda..ba619129a989bb 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM64/ImportThunk.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_ARM64/ImportThunk.cs @@ -22,6 +22,7 @@ protected override void EmitCode(NodeFactory factory, ref ARM64Emitter instructi break; case Kind.DelayLoadHelper: + case Kind.DelayLoadHelperWithExistingIndirectionCell: case Kind.VirtualStubDispatch: // x11 contains indirection cell // Do nothing x11 contains our first param diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X64/ImportThunk.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X64/ImportThunk.cs index 439a022549993f..4c34b97edcb637 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X64/ImportThunk.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X64/ImportThunk.cs @@ -37,6 +37,20 @@ protected override void EmitCode(NodeFactory factory, ref X64Emitter instruction break; + case Kind.DelayLoadHelperWithExistingIndirectionCell: + // Indirection cell is already in rax which will be first arg. Used for fast tailcalls. + + if (!relocsOnly) + { + // push table index + instructionEncoder.EmitPUSH((sbyte)_containingImportSection.IndexFromBeginningOfArray); + } + + // push [module] + instructionEncoder.EmitPUSH(factory.ModuleImport); + + break; + case Kind.VirtualStubDispatch: // mov rax, r11 - this is the most general case as the value of R11 also propagates // to the new method after the indirection cell has been updated so the cell content diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X86/ImportThunk.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X86/ImportThunk.cs index 94f5bcb5858c14..5ae8df415f54fd 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X86/ImportThunk.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/Target_X86/ImportThunk.cs @@ -41,6 +41,9 @@ protected override void EmitCode(NodeFactory factory, ref X86Emitter instruction // mov edx, [module] instructionEncoder.EmitMOV(Register.EDX, factory.ModuleImport); break; + + default: + throw new NotSupportedException(_thunkKind.ToString() + " is not supported"); } instructionEncoder.EmitJMP(_helperCell); } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs index da9c4592b549a6..4fd1f958d9c3b1 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunCodegenNodeFactory.cs @@ -219,7 +219,7 @@ private void CreateNodeCaches() _importThunks = new NodeCache(key => { - return new ImportThunk(this, key.Helper, key.ContainingImportSection, key.UseVirtualCall); + return new ImportThunk(this, key.Helper, key.ContainingImportSection, key.UseVirtualCall, key.UseJumpableStub); }); _importMethods = new NodeCache(CreateMethodEntrypoint); @@ -377,6 +377,7 @@ private IMethodNode CreateMethodEntrypoint(TypeAndMethod key) if (isPrecodeImportRequired) { + Debug.Assert(!key.IsJumpableImportRequired); return new PrecodeMethodImport( this, ReadyToRunFixupKind.MethodEntry, @@ -391,13 +392,15 @@ private IMethodNode CreateMethodEntrypoint(TypeAndMethod key) ReadyToRunFixupKind.MethodEntry, method, methodWithGCInfo, - isInstantiatingStub); + isInstantiatingStub, + isJump: key.IsJumpableImportRequired); } } - public IMethodNode MethodEntrypoint(MethodWithToken method, bool isInstantiatingStub, bool isPrecodeImportRequired) + public IMethodNode MethodEntrypoint(MethodWithToken method, bool isInstantiatingStub, bool isPrecodeImportRequired, bool isJumpableImportRequired) { - TypeAndMethod key = new TypeAndMethod(method.ConstrainedType, method, isInstantiatingStub, isPrecodeImportRequired); + Debug.Assert(!isJumpableImportRequired || !isPrecodeImportRequired); + TypeAndMethod key = new TypeAndMethod(method.ConstrainedType, method, isInstantiatingStub, isPrecodeImportRequired, isJumpableImportRequired); return _importMethods.GetOrAdd(key); } @@ -416,7 +419,7 @@ public IEnumerable EnumerateCompiledMethods(EcmaModule moduleT EcmaModule module = ((EcmaMethod)method.GetTypicalMethodDefinition()).Module; ModuleToken moduleToken = Resolver.GetModuleTokenForMethod(method, throwIfNotFound: true); - IMethodNode methodNodeDebug = MethodEntrypoint(new MethodWithToken(method, moduleToken, constrainedType: null, unboxing: false, context: null), false, false); + IMethodNode methodNodeDebug = MethodEntrypoint(new MethodWithToken(method, moduleToken, constrainedType: null, unboxing: false, context: null), false, false, false); MethodWithGCInfo methodCodeNodeDebug = methodNodeDebug as MethodWithGCInfo; if (methodCodeNodeDebug == null && methodNodeDebug is DelayLoadMethodImport DelayLoadMethodImport) { @@ -470,7 +473,7 @@ public MethodFixupSignature MethodSignature( MethodWithToken method, bool isInstantiatingStub) { - TypeAndMethod key = new TypeAndMethod(method.ConstrainedType, method, isInstantiatingStub, false); + TypeAndMethod key = new TypeAndMethod(method.ConstrainedType, method, isInstantiatingStub, false, false); return _methodSignatures.GetOrAdd(new MethodFixupKey(fixupKind, key)); } @@ -558,19 +561,22 @@ private struct ImportThunkKey : IEquatable public readonly ReadyToRunHelper Helper; public readonly ImportSectionNode ContainingImportSection; public readonly bool UseVirtualCall; + public readonly bool UseJumpableStub; - public ImportThunkKey(ReadyToRunHelper helper, ImportSectionNode containingImportSection, bool useVirtualCall) + public ImportThunkKey(ReadyToRunHelper helper, ImportSectionNode containingImportSection, bool useVirtualCall, bool useJumpableStub) { Helper = helper; ContainingImportSection = containingImportSection; UseVirtualCall = useVirtualCall; + UseJumpableStub = useJumpableStub; } public bool Equals(ImportThunkKey other) { return Helper == other.Helper && ContainingImportSection == other.ContainingImportSection && - UseVirtualCall == other.UseVirtualCall; + UseVirtualCall == other.UseVirtualCall && + UseJumpableStub == other.UseJumpableStub; } public override bool Equals(object obj) @@ -582,15 +588,16 @@ public override int GetHashCode() { return unchecked(31 * Helper.GetHashCode() + 31 * ContainingImportSection.GetHashCode() + - 31 * UseVirtualCall.GetHashCode()); + 31 * UseVirtualCall.GetHashCode() + + 31 * UseJumpableStub.GetHashCode()); } } private NodeCache _importThunks; - public ImportThunk ImportThunk(ReadyToRunHelper helper, ImportSectionNode containingImportSection, bool useVirtualCall) + public ImportThunk ImportThunk(ReadyToRunHelper helper, ImportSectionNode containingImportSection, bool useVirtualCall, bool useJumpableStub) { - ImportThunkKey thunkKey = new ImportThunkKey(helper, containingImportSection, useVirtualCall); + ImportThunkKey thunkKey = new ImportThunkKey(helper, containingImportSection, useVirtualCall, useJumpableStub); return _importThunks.GetOrAdd(thunkKey); } @@ -693,13 +700,13 @@ public void AttachToDependencyGraph(DependencyAnalyzerBase graph) Import personalityRoutineImport = new Import(EagerImports, new ReadyToRunHelperSignature( ReadyToRunHelper.PersonalityRoutine)); PersonalityRoutine = new ImportThunk(this, - ReadyToRunHelper.PersonalityRoutine, EagerImports, useVirtualCall: false); + ReadyToRunHelper.PersonalityRoutine, EagerImports, useVirtualCall: false, useJumpableStub: false); graph.AddRoot(PersonalityRoutine, "Personality routine is faster to root early rather than referencing it from each unwind info"); Import filterFuncletPersonalityRoutineImport = new Import(EagerImports, new ReadyToRunHelperSignature( ReadyToRunHelper.PersonalityRoutineFilterFunclet)); FilterFuncletPersonalityRoutine = new ImportThunk(this, - ReadyToRunHelper.PersonalityRoutineFilterFunclet, EagerImports, useVirtualCall: false); + ReadyToRunHelper.PersonalityRoutineFilterFunclet, EagerImports, useVirtualCall: false, useJumpableStub: false); graph.AddRoot(FilterFuncletPersonalityRoutine, "Filter funclet personality routine is faster to root early rather than referencing it from each unwind info"); } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs index 38b1893f531628..02cc4d4edada9b 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs @@ -118,7 +118,8 @@ private void CreateNodeCaches() IMethodNode targetMethodNode = _codegenNodeFactory.MethodEntrypoint( ctorKey.Method, isInstantiatingStub: ctorKey.Method.Method.HasInstantiation, - isPrecodeImportRequired: false); + isPrecodeImportRequired: false, + isJumpableImportRequired: false); return new DelayLoadHelperImport( _codegenNodeFactory, @@ -435,7 +436,8 @@ public ISymbolNode DelegateCtor(TypeDesc delegateType, MethodWithToken method) delegateType, method, isInstantiatingStub: false, - isPrecodeImportRequired: false); + isPrecodeImportRequired: false, + isJumpableImportRequired: false); return _delegateCtors.GetOrAdd(ctorKey); } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/TypeAndMethod.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/TypeAndMethod.cs index 7b92613a2ab500..16843d70599a6a 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/TypeAndMethod.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/TypeAndMethod.cs @@ -16,13 +16,15 @@ internal struct TypeAndMethod : IEquatable public readonly MethodWithToken Method; public readonly bool IsInstantiatingStub; public readonly bool IsPrecodeImportRequired; + public readonly bool IsJumpableImportRequired; - public TypeAndMethod(TypeDesc type, MethodWithToken method, bool isInstantiatingStub, bool isPrecodeImportRequired) + public TypeAndMethod(TypeDesc type, MethodWithToken method, bool isInstantiatingStub, bool isPrecodeImportRequired, bool isJumpableImportRequired) { Type = type; Method = method; IsInstantiatingStub = isInstantiatingStub; IsPrecodeImportRequired = isPrecodeImportRequired; + IsJumpableImportRequired = isJumpableImportRequired; } public bool Equals(TypeAndMethod other) @@ -30,7 +32,8 @@ public bool Equals(TypeAndMethod other) return Type == other.Type && Method.Equals(other.Method) && IsInstantiatingStub == other.IsInstantiatingStub && - IsPrecodeImportRequired == other.IsPrecodeImportRequired; + IsPrecodeImportRequired == other.IsPrecodeImportRequired && + IsJumpableImportRequired == other.IsJumpableImportRequired; } public override bool Equals(object obj) @@ -43,7 +46,8 @@ public override int GetHashCode() return (Type?.GetHashCode() ?? 0) ^ unchecked(Method.GetHashCode() * 31) ^ (IsInstantiatingStub ? 0x40000000 : 0) ^ - (IsPrecodeImportRequired ? 0x20000000 : 0); + (IsPrecodeImportRequired ? 0x20000000 : 0) ^ + (IsJumpableImportRequired ? 0x10000000 : 0); } } } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index f3701699cca617..b11bb77a021175 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1943,7 +1943,8 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO _compilation.NodeFactory.MethodEntrypoint( ComputeMethodWithToken(nonUnboxingMethod, ref pResolvedToken, constrainedType, unboxing: isUnboxingStub), isInstantiatingStub: useInstantiatingStub, - isPrecodeImportRequired: (flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) != 0)); + isPrecodeImportRequired: (flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) != 0, + isJumpableImportRequired: false)); } // If the abi of the method isn't stable, this will cause a usage of the RequiresRuntimeJitSymbol, which will trigger a RequiresRuntimeJitException @@ -2625,5 +2626,27 @@ private void reportInliningDecision(CORINFO_METHOD_STRUCT_* inlinerHnd, CORINFO_ _inlinedMethods.Add(inlinee); } } + + private void updateEntryPointForTailCall(ref CORINFO_CONST_LOOKUP entryPoint) + { + // In x64 we normally use a return address to find the indirection cell for delay load. + // For tailcalls we instead expect the JIT to leave the indirection in rax. + if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.X64) + return; + + object node = HandleToObject((IntPtr)entryPoint.addr); + if (node is not DelayLoadMethodImport imp) + return; + + Debug.Assert(imp.GetType() == typeof(DelayLoadMethodImport)); + IMethodNode newEntryPoint = + _compilation.NodeFactory.MethodEntrypoint( + imp.MethodWithToken, + ((MethodFixupSignature)imp.ImportSignature.Target).IsInstantiatingStub, + false, + true); + + entryPoint = CreateConstLookupToSymbol(newEntryPoint); + } } } diff --git a/src/coreclr/tools/aot/jitinterface/jitinterface.h b/src/coreclr/tools/aot/jitinterface/jitinterface.h index 86591df5df32f9..cd7e43cbe9d9ed 100644 --- a/src/coreclr/tools/aot/jitinterface/jitinterface.h +++ b/src/coreclr/tools/aot/jitinterface/jitinterface.h @@ -167,6 +167,7 @@ struct JitInterfaceCallbacks bool (* getTailCallHelpers)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* callToken, CORINFO_SIG_INFO* sig, CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult); bool (* convertPInvokeCalliToCall)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_RESOLVED_TOKEN* pResolvedToken, bool mustConvert); bool (* notifyInstructionSetUsage)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_InstructionSet instructionSet, bool supportEnabled); + void (* updateEntryPointForTailCall)(void * thisHandle, CorInfoExceptionClass** ppException, CORINFO_CONST_LOOKUP* entryPoint); void (* allocMem)(void * thisHandle, CorInfoExceptionClass** ppException, AllocMemArgs* pArgs); void (* reserveUnwindInfo)(void * thisHandle, CorInfoExceptionClass** ppException, bool isFunclet, bool isColdCode, uint32_t unwindSize); void (* allocUnwindInfo)(void * thisHandle, CorInfoExceptionClass** ppException, uint8_t* pHotCode, uint8_t* pColdCode, uint32_t startOffset, uint32_t endOffset, uint32_t unwindSize, uint8_t* pUnwindBlock, CorJitFuncKind funcKind); @@ -1695,6 +1696,14 @@ class JitInterfaceWrapper : public ICorJitInfo return temp; } + virtual void updateEntryPointForTailCall( + CORINFO_CONST_LOOKUP* entryPoint) +{ + CorInfoExceptionClass* pException = nullptr; + _callbacks->updateEntryPointForTailCall(_thisHandle, &pException, entryPoint); + if (pException != nullptr) throw pException; +} + virtual void allocMem( AllocMemArgs* pArgs) { diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 51c87fa8d678e3..dae72f3e3a5a99 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -14568,6 +14568,11 @@ bool CEEInfo::convertPInvokeCalliToCall(CORINFO_RESOLVED_TOKEN * pResolvedToken, return false; } +void CEEInfo::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) +{ + // No update necessary, all entry points are tail callable in runtime. +} + void CEEInfo::allocMem (AllocMemArgs *pArgs) { LIMITED_METHOD_CONTRACT; From 7f21f31931c0f5e041f588dccbc2e8dead1ff42a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 1 Aug 2021 18:11:38 +0200 Subject: [PATCH 07/22] Minor changes and fix build break --- src/coreclr/inc/jiteeversionguid.h | 1 - src/coreclr/jit/codegenarmarch.cpp | 5 ++++- src/coreclr/jit/codegenxarch.cpp | 5 +---- src/coreclr/jit/lower.cpp | 2 +- src/coreclr/jit/lowerxarch.cpp | 7 ------- src/coreclr/jit/morph.cpp | 10 +++++----- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 4 ++-- 7 files changed, 13 insertions(+), 21 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 35e0fa641eaabb..a7763f7835b276 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -44,7 +44,6 @@ typedef const GUID *LPCGUID; #endif // !GUID_DEFINED constexpr GUID JITEEVersionIdentifier = { /* 7374274c-5cb5-4c41-872e-01f438ac1548 */ - 0x7374274c, 0x5cb5, 0x4c41, diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index c67f23c6232e22..37434a2d3f9789 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -2364,12 +2364,15 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // Use IP0 on ARM64 and R12 on ARM32 as the call target register. inst_Mov(TYP_I_IMPL, REG_FASTTAILCALL_TARGET, target->GetRegNum(), /* canSkip */ true); } +#ifdef FEATURE_READYTORUN_COMPILER else if (call->IsR2RRelativeIndir()) { assert(call->gtEntryPoint.accessType == IAT_PVALUE); assert(call->gtControlExpr == nullptr); - GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET, REG_R2R_INDIRECT_PARAM); + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET, + REG_R2R_INDIRECT_PARAM); } +#endif return; } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 88698a7f4c4223..7990fc565e7fb5 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5193,10 +5193,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // rip-relative jump. // Note that the indirection cell is required to be in RAX for R2R delay load helper // that expects to find it there (see ImportThunk.Kind.DelayLoadHelperWithExistingIndirectionCell - // in crossgen2). We handle this by marking the target as contained and then emitting - // mov rax, cell - // jmp [rax] - // in codegen. + // in crossgen2). if (target != nullptr && !target->isContained()) { genConsumeReg(target); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 00801ef75c6d07..a2896961fbc64f 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3714,7 +3714,7 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call) #elif defined(TARGET_XARCH) // For xarch we usually get the indirection cell from the return address, // except for fast tailcalls where we do the same as ARM. - hasIndirectionCell = call->IsR2RRelativeIndir() && call->IsFastTailCall(); + hasIndirectionCell = call->IsR2RRelativeIndir() && call->IsFastTailCall(); #endif if (!hasIndirectionCell) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8fb99594b3c41d..d0f5e44eef2ec6 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4495,13 +4495,6 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call) canContainIndir = ctrlExpr->AsIndir()->HasBase() && ctrlExpr->AsIndir()->Base()->isContainedIntOrIImmed() && !ctrlExpr->AsIndir()->HasIndex(); - - // For R2R we cannot allow containment as the delay load helper - // expects the indirection cell to be left in a register. -#ifdef FEATURE_READYTORUN_COMPILER - if (call->gtEntryPoint.addr != nullptr) - canContainIndir = false; -#endif } if (canContainIndir) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index dd6c851c66548b..11bf98c18e4c9d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3227,7 +3227,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); } #else // !UNIX_AMD64_ABI - size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' + size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' if (!isStructArg) { byteSize = genTypeSize(argx); @@ -7689,10 +7689,10 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif - // For R2R we might need a different entry point for this call if we are doing a tailcall. - // The reason is that the normal delay load helper uses the return address to find the indirection - // cell in x64, but now the JIT is expected to leave the indirection cell in rax. - // We optimize delegate invocations manually in the JIT so skip this for those. +// For R2R we might need a different entry point for this call if we are doing a tailcall. +// The reason is that the normal delay load helper uses the return address to find the indirection +// cell in x64, but now the JIT is expected to leave the indirection cell in rax. +// We optimize delegate invocations manually in the JIT so skip this for those. #ifdef FEATURE_READYTORUN_COMPILER if (call->IsR2RRelativeIndir() && canFastTailCall && !fastTailCallToLoop && !call->IsDelegateInvoke()) { diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index b11bb77a021175..7ffce9b89662a2 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -2643,8 +2643,8 @@ private void updateEntryPointForTailCall(ref CORINFO_CONST_LOOKUP entryPoint) _compilation.NodeFactory.MethodEntrypoint( imp.MethodWithToken, ((MethodFixupSignature)imp.ImportSignature.Target).IsInstantiatingStub, - false, - true); + isPrecodeImportRequired: false, + isJumpableImportRequired: true); entryPoint = CreateConstLookupToSymbol(newEntryPoint); } From 5ce1e65a070532e5c88d46f007f8298fc1ecdbb6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 1 Aug 2021 18:25:19 +0200 Subject: [PATCH 08/22] Switch to a define for tailcall register --- src/coreclr/jit/codegencommon.cpp | 10 ++++++---- src/coreclr/jit/codegenxarch.cpp | 4 ++-- src/coreclr/jit/lsraxarch.cpp | 6 +++--- src/coreclr/jit/morph.cpp | 4 ++-- src/coreclr/jit/targetamd64.h | 4 ++++ 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 67d277e84840f3..b82ce4dd1ab95d 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8470,6 +8470,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) // clang-format off if (call->IsR2RRelativeIndir()) { + // Indirection cell is in REG_FASTTAILCALL_TARGET, so just emit + // jmp [reg] GetEmitter()->emitIns_Call( emitter::EC_INDIR_ARD, call->gtCallMethHnd, @@ -8481,7 +8483,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, - BAD_IL_OFFSET, REG_RAX, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */ + BAD_IL_OFFSET, REG_FASTTAILCALL_TARGET, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */ true /* isJump */ ); } @@ -8538,9 +8540,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) else { // Target requires indirection to obtain. genCallInstruction will have materialized - // it into RAX already, so just jump to it. The stack walker requires that a register - // indirect tail call be rex.w prefixed. - GetEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX); + // it into REG_FASTTAILCALL_TARGET already, so just jump to it. The stack walker requires that a + // register indirect tail call be rex.w prefixed. + GetEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7990fc565e7fb5..3481da6c0c4152 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5191,13 +5191,13 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // If this is indirect then we go through RAX with epilog sequence // generating "jmp rax". Otherwise epilog will try to generate a // rip-relative jump. - // Note that the indirection cell is required to be in RAX for R2R delay load helper + // Note that the indirection cell is required to be in REG_FASTTAILCALL_TARGET for R2R delay load helper // that expects to find it there (see ImportThunk.Kind.DelayLoadHelperWithExistingIndirectionCell // in crossgen2). if (target != nullptr && !target->isContained()) { genConsumeReg(target); - genCopyRegIfNeeded(target, REG_RAX); + genCopyRegIfNeeded(target, REG_FASTTAILCALL_TARGET); } return; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 9d02e398ad7698..1718a959d3f198 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1216,9 +1216,9 @@ int LinearScan::BuildCall(GenTreeCall* call) // computed into a register. if (call->IsFastTailCall() && !ctrlExpr->isContained()) { - // Fast tail call - make sure that call target is always computed in RAX - // so that epilog sequence can generate "jmp rax" to achieve fast tail call. - ctrlExprCandidates = RBM_RAX; + // Fast tail call - make sure that call target is always computed in REG_FASTTAILCALL_TARGET + // so that epilog sequence can generate "jmp reg" to achieve fast tail call. + ctrlExprCandidates = RBM_FASTTAILCALL_TARGET; } #ifdef TARGET_X86 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 11bf98c18e4c9d..de559472e9e215 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2875,7 +2875,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #ifdef DEBUG indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; #endif - indirectCellAddress->SetRegNum(REG_RAX); + indirectCellAddress->SetRegNum(REG_FASTTAILCALL_TARGET); // Push the stub address onto the list of arguments. call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); @@ -7691,7 +7691,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // For R2R we might need a different entry point for this call if we are doing a tailcall. // The reason is that the normal delay load helper uses the return address to find the indirection -// cell in x64, but now the JIT is expected to leave the indirection cell in rax. +// cell in x64, but now the JIT is expected to leave the indirection cell in REG_FASTTAILCALL_TARGET. // We optimize delegate invocations manually in the JIT so skip this for those. #ifdef FEATURE_READYTORUN_COMPILER if (call->IsR2RRelativeIndir() && canFastTailCall && !fastTailCallToLoop && !call->IsDelegateInvoke()) diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index a3d41541e02db1..6f177051bf9454 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -222,6 +222,10 @@ #define REG_DEFAULT_HELPER_CALL_TARGET REG_RAX #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_RAX + #define REG_FASTTAILCALL_TARGET REG_RAX // Target register for fast tail call/indirection cell for R2R fast tailcall + // See ImportThunk.Kind.DelayLoadHelperWithExistingIndirectionCell in crossgen2. + #define RBM_FASTTAILCALL_TARGET RBM_RAX + // GenericPInvokeCalliHelper VASigCookie Parameter #define REG_PINVOKE_COOKIE_PARAM REG_R11 #define RBM_PINVOKE_COOKIE_PARAM RBM_R11 From 81268fa2726804ec1821ab3a6e02f559897c694b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 2 Aug 2021 12:04:04 +0200 Subject: [PATCH 09/22] Fix x86 --- src/coreclr/jit/targetx86.h | 4 ++++ .../JitInterface/CorInfoImpl.ReadyToRun.cs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 9f7d0fa7e992bf..98fd8fddc9a01a 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -188,6 +188,10 @@ #define REG_VIRTUAL_STUB_TARGET REG_EAX #define RBM_VIRTUAL_STUB_TARGET RBM_EAX + // Register where tailcall target is left, not currently used. + #define REG_FASTTAILCALL_TARGET REG_EAX + #define RBM_FASTTAILCALL_TARGET RBM_EAX + // Registers used by PInvoke frame setup #define REG_PINVOKE_FRAME REG_EDI // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper #define RBM_PINVOKE_FRAME RBM_EDI diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 7ffce9b89662a2..09713df9044393 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -899,6 +899,8 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_GETREFANY: // For Vector256.Create and similar cases case CorInfoHelpFunc.CORINFO_HELP_THROW_NOT_IMPLEMENTED: + // For x86 tailcall where helper is required we need runtime JIT. + case CorInfoHelpFunc.CORINFO_HELP_TAILCALL: throw new RequiresRuntimeJitException(ftnNum.ToString()); default: From 8e15c1c29ff8fadb1e39ecdd1435bbaf42762cd5 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 2 Aug 2021 17:33:17 +0200 Subject: [PATCH 10/22] Implement DefType.IsUnsafeValueType --- .../Compiler/VectorFieldLayoutAlgorithm.cs | 6 +++ .../tools/Common/JitInterface/CorInfoImpl.cs | 5 +-- .../TypeSystem/Common/DefType.FieldLayout.cs | 41 +++++++++++++++++++ .../TypeSystem/Common/FieldLayoutAlgorithm.cs | 5 +++ .../Common/MetadataFieldLayoutAlgorithm.cs | 27 ++++++++++++ .../Common/UniversalCanonLayoutAlgorithm.cs | 5 +++ .../RuntimeDeterminedFieldLayoutAlgorithm.cs | 8 ++++ .../Compiler/ReadyToRunCompilerContext.cs | 5 +++ .../SystemObjectFieldLayoutAlgorithm.cs | 5 +++ 9 files changed, 104 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs index 95bbf3559b2ba2..a4e49bde5fec5f 100644 --- a/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs @@ -90,6 +90,12 @@ public override bool ComputeContainsGCPointers(DefType type) return false; } + public override bool ComputeIsUnsafeValueType(DefType type) + { + Debug.Assert(!_fallbackAlgorithm.ComputeIsUnsafeValueType(type)); + return false; + } + public override ValueTypeShapeCharacteristics ComputeValueTypeShapeCharacteristics(DefType type) { if (type.Context.Target.Architecture == TargetArchitecture.ARM64 && diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index 02549319752d1d..1530ddbf932139 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -1886,9 +1886,8 @@ private uint getClassAttribsInternal(TypeDesc type) if (metadataType.IsExplicitLayout || (metadataType.IsSequentialLayout && metadataType.GetClassLayout().Size != 0) || metadataType.IsWellKnownType(WellKnownType.TypedReference)) result |= CorInfoFlag.CORINFO_FLG_CUSTOMLAYOUT; - // TODO - // if (type.IsUnsafeValueType) - // result |= CorInfoFlag.CORINFO_FLG_UNSAFE_VALUECLASS; + if (metadataType.IsUnsafeValueType) + result |= CorInfoFlag.CORINFO_FLG_UNSAFE_VALUECLASS; } if (type.IsCanonicalSubtype(CanonicalFormKind.Any)) diff --git a/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs b/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs index 3e73dcfd821171..71c7423b8fbb04 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs @@ -53,6 +53,16 @@ private static class FieldLayoutFlags /// True if the layout of the type is not stable for use in the ABI /// public const int ComputedInstanceLayoutAbiUnstable = 0x80; + + /// + /// True if IsUnsafeValueType has been computed + /// + public const int ComputedIsUnsafeValueType = 0x100; + + /// + /// True if type transitively has UnsafeValueTypeAttribute + /// + public const int IsUnsafeValueType = 0x200; } private class StaticBlockInfo @@ -90,6 +100,22 @@ public bool ContainsGCPointers } } + /// + /// Does a type transitively have any fields which are marked with System.Runtime.CompilerServices.UnsafeValueTypeAttribute + /// + public bool IsUnsafeValueType + { + get + { + if (!_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedIsUnsafeValueType)) + { + ComputeIsUnsafeValueType(); + } + return _fieldLayoutFlags.HasFlags(FieldLayoutFlags.IsUnsafeValueType); + } + } + + /// /// The number of bytes required to hold a field of this type /// @@ -449,5 +475,20 @@ public void ComputeTypeContainsGCPointers() _fieldLayoutFlags.AddFlags(flagsToAdd); } + + public void ComputeIsUnsafeValueType() + { + if (_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedIsUnsafeValueType)) + return; + + int flagsToAdd = FieldLayoutFlags.ComputedIsUnsafeValueType; + + if (this.Context.GetLayoutAlgorithmForType(this).ComputeIsUnsafeValueType(this)) + { + flagsToAdd |= FieldLayoutFlags.IsUnsafeValueType; + } + + _fieldLayoutFlags.AddFlags(flagsToAdd); + } } } diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs index 47c86385c15bff..863e857a21def8 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs @@ -31,6 +31,11 @@ public abstract class FieldLayoutAlgorithm /// public abstract bool ComputeContainsGCPointers(DefType type); + /// + /// Compute whether the specified type is a value type that transitively has UnsafeValueTypeAttribute + /// + public abstract bool ComputeIsUnsafeValueType(DefType type); + /// /// Compute the shape of a value type. The shape information is used to control code generation and allocation /// (such as vectorization, passing the value type by value across method calls, or boxing alignment). diff --git a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs index 4109bb701aa9ac..5ba8244d5eefdc 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs @@ -1014,6 +1014,33 @@ private ValueTypeShapeCharacteristics ComputeHomogeneousAggregateCharacteristic( return NotHA; } + public override bool ComputeIsUnsafeValueType(DefType type) + { + if (!type.IsValueType) + return false; + + MetadataType metadataType = (MetadataType)type; + if (metadataType.HasCustomAttribute("System.Runtime.CompilerServices", "UnsafeValueTypeAttribute")) + return true; + + foreach (FieldDesc field in metadataType.GetFields()) + { + if (field.IsStatic || field.IsLiteral) + continue; + + if (!(field.FieldType is DefType fieldType)) + continue; + + if (fieldType.IsPrimitive) + continue; + + if (fieldType.IsUnsafeValueType) + return true; + } + + return false; + } + private struct SizeAndAlignment { public LayoutInt Size; diff --git a/src/coreclr/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs index 9b58069b5758f3..196aef8c7e41c6 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs @@ -34,6 +34,11 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, }; } + public override bool ComputeIsUnsafeValueType(DefType type) + { + throw new NotSupportedException(); + } + public override ComputedStaticFieldLayout ComputeStaticFieldLayout(DefType type, StaticLayoutKind layoutKind) { return new ComputedStaticFieldLayout() diff --git a/src/coreclr/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs index 59857683b10cb9..2d26f451dcf0c6 100644 --- a/src/coreclr/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs @@ -58,5 +58,13 @@ public override ValueTypeShapeCharacteristics ComputeValueTypeShapeCharacteristi return canonicalType.ValueTypeShapeCharacteristics; } + + public override bool ComputeIsUnsafeValueType(DefType type) + { + RuntimeDeterminedType runtimeDeterminedType = (RuntimeDeterminedType)type; + DefType canonicalType = runtimeDeterminedType.CanonicalType; + + return canonicalType.IsUnsafeValueType; + } } } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs index 57905165c4862a..44bddc17c8b639 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs @@ -154,6 +154,11 @@ public override bool ComputeContainsGCPointers(DefType type) return false; } + public override bool ComputeIsUnsafeValueType(DefType type) + { + return false; + } + public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, InstanceLayoutKind layoutKind) { DefType similarSpecifiedVector = GetSimilarVector(type); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs index 2d04a02448a7b9..a0d68d124e89dc 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs @@ -50,6 +50,11 @@ public override bool ComputeContainsGCPointers(DefType type) return false; } + public override bool ComputeIsUnsafeValueType(DefType type) + { + return false; + } + public override ValueTypeShapeCharacteristics ComputeValueTypeShapeCharacteristics(DefType type) { return _fallbackAlgorithm.ComputeValueTypeShapeCharacteristics(type); From b971fceb8c6b946ef9483d0348803f27001a82b7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 2 Aug 2021 23:20:17 +0200 Subject: [PATCH 11/22] Emit rex.jmp for tailcall jumps on x64 --- src/coreclr/jit/codegencommon.cpp | 3 +-- src/coreclr/jit/emitxarch.cpp | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index b82ce4dd1ab95d..d2fabe9d5f3665 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8470,8 +8470,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) // clang-format off if (call->IsR2RRelativeIndir()) { - // Indirection cell is in REG_FASTTAILCALL_TARGET, so just emit - // jmp [reg] + // Indirection cell is in REG_FASTTAILCALL_TARGET. GetEmitter()->emitIns_Call( emitter::EC_INDIR_ARD, call->gtCallMethHnd, diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index dc55403d6dcf30..bcc037c26718a6 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -7522,9 +7522,20 @@ void emitter::emitIns_Call(EmitCallType callType, { ins = INS_l_jmp; } + else if (callType == EC_FUNC_TOKEN_INDIR) + { + ins = INS_i_jmp; + } else { + assert(callType == EC_INDIR_ARD); +#ifdef TARGET_AMD64 + // The unwinder needs rex prefix to recognize that this jump is part of the epilog. + // See excepamd64.cpp/RtlpxVirtualUnwind. + ins = INS_rex_jmp; +#else ins = INS_i_jmp; +#endif } } id->idIns(ins); @@ -8891,7 +8902,12 @@ void emitter::emitDispIns( emitDispAddrMode(id, isNew); emitDispShift(ins); - if ((ins == INS_call) || (ins == INS_i_jmp)) + bool isCallInst; + isCallInst = (ins == INS_call) || (ins == INS_i_jmp); +#ifdef TARGET_AMD64 + isCallInst = isCallInst || (ins == INS_rex_jmp); +#endif + if (isCallInst) { assert(id->idInsFmt() == IF_ARD); From 483b34a9a1441be0d6f8136a94a72935b051799d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 3 Aug 2021 01:24:44 +0200 Subject: [PATCH 12/22] Refactor non standard args + refix recursive tailcall opt --- src/coreclr/jit/compiler.h | 41 ++++++++---- src/coreclr/jit/morph.cpp | 126 +++++++++++++++++++++++++------------ 2 files changed, 117 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e3e4bf89e61d2f..8e9fba7f0f751a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1463,6 +1463,20 @@ struct FuncInfoDsc // that isn't shared between the main function body and funclets. }; +enum class NonStandardArgKind +{ + None, + PInvokeFrame, + PInvokeTarget, + PInvokeCookie, + WrapperDelegateCell, + ShiftLow, + ShiftHigh, + RetBuffer, + VirtualStubCell, + R2RIndirectionCell, +}; + struct fgArgTabEntry { GenTreeCall::Use* use; // Points to the argument's GenTreeCall::Use in gtCallArgs or gtCallThisArg. @@ -1523,17 +1537,17 @@ struct fgArgTabEntry // struct is passed as a scalar type, this is that type. // Note that if a struct is passed by reference, this will still be the struct type. - bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar - bool needPlace : 1; // True when we must replace this argument with a placeholder node - bool isTmp : 1; // True when we setup a temp LclVar for this argument due to size issues with the struct - bool processed : 1; // True when we have decided the evaluation order for this argument in the gtCallLateArgs - bool isBackFilled : 1; // True when the argument fills a register slot skipped due to alignment requirements of - // previous arguments. - bool isNonStandard : 1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced - // to be on the stack despite its arg list position. - bool isStruct : 1; // True if this is a struct arg - bool _isVararg : 1; // True if the argument is in a vararg context. - bool passedByRef : 1; // True iff the argument is passed by reference. + bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar + bool needPlace : 1; // True when we must replace this argument with a placeholder node + bool isTmp : 1; // True when we setup a temp LclVar for this argument due to size issues with the struct + bool processed : 1; // True when we have decided the evaluation order for this argument in the gtCallLateArgs + bool isBackFilled : 1; // True when the argument fills a register slot skipped due to alignment requirements of + // previous arguments. + NonStandardArgKind nonStandardArgKind : 4; // The non-standard arg kind. Non-standard args are args that are forced + // to be in certain registers. + bool isStruct : 1; // True if this is a struct arg + bool _isVararg : 1; // True if the argument is in a vararg context. + bool passedByRef : 1; // True iff the argument is passed by reference. #ifdef FEATURE_ARG_SPLIT bool _isSplit : 1; // True when this argument is split between the registers and OutArg area #endif // FEATURE_ARG_SPLIT @@ -1559,6 +1573,11 @@ struct fgArgTabEntry #endif } + bool isNonStandard() const + { + return nonStandardArgKind != NonStandardArgKind::None; + } + bool isLateArg() const { bool isLate = (_lateArgInx != UINT_MAX); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index de559472e9e215..ef73f0f48f97f9 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -907,9 +907,43 @@ void fgArgTabEntry::Dump() const { printf(", isBackFilled"); } - if (isNonStandard) + if (nonStandardArgKind != NonStandardArgKind::None) { - printf(", isNonStandard"); + const char* kind; + switch (nonStandardArgKind) + { + case NonStandardArgKind::PInvokeFrame: + kind = "PInvokeFrame"; + break; + case NonStandardArgKind::PInvokeTarget: + kind = "PInvokeTarget"; + break; + case NonStandardArgKind::PInvokeCookie: + kind = "PInvokeCookie"; + break; + case NonStandardArgKind::WrapperDelegateCell: + kind = "WrapperDelegateCell"; + break; + case NonStandardArgKind::ShiftLow: + kind = "ShiftLow"; + break; + case NonStandardArgKind::ShiftHigh: + kind = "ShiftHigh"; + break; + case NonStandardArgKind::RetBuffer: + kind = "RetBuffer"; + break; + case NonStandardArgKind::VirtualStubCell: + kind = "VirtualStubCell"; + break; + case NonStandardArgKind::R2RIndirectionCell: + kind = "R2RIndirectionCell"; + break; + default: + kind = "Unknown"; + break; + } + printf(", nonStandard[%s]", kind); } if (isStruct) { @@ -1106,9 +1140,9 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, { curArgTabEntry->SetHfaElemKind(CORINFO_HFA_ELEM_NONE); } - curArgTabEntry->isBackFilled = false; - curArgTabEntry->isNonStandard = false; - curArgTabEntry->isStruct = isStruct; + curArgTabEntry->isBackFilled = false; + curArgTabEntry->nonStandardArgKind = NonStandardArgKind::None; + curArgTabEntry->isStruct = isStruct; curArgTabEntry->SetIsVararg(isVararg); curArgTabEntry->SetByteAlignment(byteAlignment); curArgTabEntry->SetByteSize(byteSize, isStruct, isFloatHfa); @@ -1204,9 +1238,9 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, { curArgTabEntry->SetHfaElemKind(CORINFO_HFA_ELEM_NONE); } - curArgTabEntry->isBackFilled = false; - curArgTabEntry->isNonStandard = false; - curArgTabEntry->isStruct = isStruct; + curArgTabEntry->isBackFilled = false; + curArgTabEntry->nonStandardArgKind = NonStandardArgKind::None; + curArgTabEntry->isStruct = isStruct; curArgTabEntry->SetIsVararg(isVararg); curArgTabEntry->SetByteAlignment(byteAlignment); @@ -2562,9 +2596,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) { struct NonStandardArg { - regNumber reg; // The register to be assigned to this non-standard argument. - GenTree* node; // The tree node representing this non-standard argument. - // Note that this must be updated if the tree node changes due to morphing! + GenTree* node; // The tree node representing this non-standard argument. + // Note that this must be updated if the tree node changes due to morphing! + regNumber reg; // The register to be assigned to this non-standard argument. + NonStandardArgKind kind; // The kind of the nonstandard arg }; ArrayStack args; @@ -2584,9 +2619,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // Return Value: // None. // - void Add(GenTree* node, regNumber reg) + void Add(GenTree* node, regNumber reg, NonStandardArgKind kind) { - NonStandardArg nsa = {reg, node}; + NonStandardArg nsa = {node, reg, kind}; args.Push(nsa); } @@ -2626,7 +2661,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // register to use. // 'false' otherwise (in this case, *pReg is unmodified). // - bool FindReg(GenTree* node, regNumber* pReg) + bool Find(GenTree* node, regNumber* pReg, NonStandardArgKind* kind) { for (int i = 0; i < args.Height(); i++) { @@ -2634,6 +2669,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) if (node == nsa.node) { *pReg = nsa.reg; + *kind = nsa.kind; return true; } } @@ -2691,7 +2727,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) GenTreeCall::Use* args = call->gtCallArgs; GenTree* arg1 = args->GetNode(); assert(arg1 != nullptr); - nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); + nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME, NonStandardArgKind::PInvokeFrame); } #endif // defined(TARGET_X86) || defined(TARGET_ARM) #if defined(TARGET_ARM) @@ -2728,7 +2764,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) *insertionPoint = gtNewCallArgs(newArg); numArgs++; - nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg()); + nonStandardArgs.Add(newArg, virtualStubParamInfo->GetReg(), NonStandardArgKind::WrapperDelegateCell); } #endif // defined(TARGET_ARM) #if defined(TARGET_X86) @@ -2740,12 +2776,12 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) GenTreeCall::Use* args = call->gtCallArgs; GenTree* arg1 = args->GetNode(); assert(arg1 != nullptr); - nonStandardArgs.Add(arg1, REG_LNGARG_LO); + nonStandardArgs.Add(arg1, REG_LNGARG_LO, NonStandardArgKind::ShiftLow); args = args->GetNext(); GenTree* arg2 = args->GetNode(); assert(arg2 != nullptr); - nonStandardArgs.Add(arg2, REG_LNGARG_HI); + nonStandardArgs.Add(arg2, REG_LNGARG_HI, NonStandardArgKind::ShiftHigh); } #else // !TARGET_X86 // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. @@ -2768,7 +2804,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // We don't increment numArgs here, since we already counted this argument above. - nonStandardArgs.Add(argx, theFixedRetBuffReg()); + nonStandardArgs.Add(argx, theFixedRetBuffReg(), NonStandardArgKind::RetBuffer); } // We are allowed to have a Fixed Return Buffer argument combined @@ -2785,7 +2821,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallArgs = gtPrependNewCallArg(stubAddrArg, call->gtCallArgs); numArgs++; - nonStandardArgs.Add(stubAddrArg, stubAddrArg->GetRegNum()); + nonStandardArgs.Add(stubAddrArg, stubAddrArg->GetRegNum(), NonStandardArgKind::VirtualStubCell); } else { @@ -2817,7 +2853,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallArgs = gtPrependNewCallArg(arg, call->gtCallArgs); #endif // defined(TARGET_X86) - nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); + nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM, NonStandardArgKind::PInvokeCookie); numArgs++; // put destination into R10/EAX @@ -2825,7 +2861,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallArgs = gtPrependNewCallArg(arg, call->gtCallArgs); numArgs++; - nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM); + nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM, NonStandardArgKind::PInvokeTarget); // finally change this call to a helper call call->gtCallType = CT_HELPER; @@ -2857,7 +2893,8 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); numArgs++; - nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum()); + nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum(), + NonStandardArgKind::R2RIndirectionCell); } #elif defined(TARGET_XARCH) // For xarch fast tailcalls we do the same as ARM, leaving the indirection cell in the @@ -2881,7 +2918,8 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); numArgs++; - nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum()); + nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum(), + NonStandardArgKind::R2RIndirectionCell); } #endif @@ -3083,11 +3121,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #if !defined(OSX_ARM64_ABI) unsigned argAlignBytes = TARGET_POINTER_SIZE; #endif - unsigned size = 0; - unsigned byteSize = 0; - bool isRegArg = false; - bool isNonStandard = false; - regNumber nonStdRegNum = REG_NA; + unsigned size = 0; + unsigned byteSize = 0; + bool isRegArg = false; + NonStandardArgKind nonStandardArgKind = NonStandardArgKind::None; + regNumber nonStdRegNum = REG_NA; if (GlobalJitOptions::compFeatureHfa) { @@ -3483,10 +3521,11 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // // They should not affect the placement of any other args or stack space required. // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum); + bool isNonStandard = nonStandardArgs.Find(argx, &nonStdRegNum, &nonStandardArgKind); if (isNonStandard) { - isRegArg = (nonStdRegNum != REG_STK); + assert(nonStdRegNum != REG_STK); + isRegArg = true; } else if (call->IsTailCallViaJitHelper()) { @@ -3589,7 +3628,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); newArgEntry->SetIsBackFilled(isBackFilled); - newArgEntry->isNonStandard = isNonStandard; + newArgEntry->nonStandardArgKind = nonStandardArgKind; // Set up the next intArgRegNum and fltArgRegNum values. if (!isBackFilled) @@ -3818,7 +3857,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } #endif // DEBUG - if (argEntry->isNonStandard) + if (argEntry->isNonStandard()) { // We need to update the node field for this nonStandard arg here // as it may have been changed by the call to fgMorphTree. @@ -8894,15 +8933,22 @@ unsigned Compiler::fgGetArgTabEntryParameterLclNum(GenTreeCall* call, fgArgTabEn unsigned argCount = argInfo->ArgCount(); fgArgTabEntry** argTable = argInfo->ArgTable(); - unsigned numNonStandardBefore = 0; + unsigned numToRemove = 0; for (unsigned i = 0; i < argCount; i++) { fgArgTabEntry* arg = argTable[i]; - if (arg->isNonStandard && (arg->argNum < argTabEntry->argNum)) - numNonStandardBefore++; + if (!arg->isNonStandard()) + continue; + + // Ret buffer is added as a local, so don't subtract one for it + if (arg->nonStandardArgKind == NonStandardArgKind::RetBuffer) + continue; + + if (arg->argNum < argTabEntry->argNum) + numToRemove++; } - return argTabEntry->argNum - numNonStandardBefore; + return argTabEntry->argNum - numToRemove; } //------------------------------------------------------------------------------ @@ -8996,7 +9042,9 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa { // This is an actual argument that needs to be assigned to the corresponding caller parameter. fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); - if (!curArgTabEntry->isNonStandard) + // Ret buffer is part of locals so assign it + if (!curArgTabEntry->isNonStandard() || + (curArgTabEntry->nonStandardArgKind == NonStandardArgKind::RetBuffer)) { Statement* paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, @@ -9022,7 +9070,7 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. GenTree* lateArg = use.GetNode(); fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); - if (!curArgTabEntry->isNonStandard) + if (!curArgTabEntry->isNonStandard() || curArgTabEntry->nonStandardArgKind == NonStandardArgKind::RetBuffer) { Statement* paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, From b533f4b5d04a879ef3b323f4ef70c8fc2d0d4cbf Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 4 Aug 2021 23:26:33 +0200 Subject: [PATCH 13/22] Set nonStandardArgKind for stack args also --- src/coreclr/jit/morph.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ef73f0f48f97f9..f1d12247f65f3f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -3524,8 +3524,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) bool isNonStandard = nonStandardArgs.Find(argx, &nonStdRegNum, &nonStandardArgKind); if (isNonStandard) { - assert(nonStdRegNum != REG_STK); - isRegArg = true; + isRegArg = (nonStdRegNum != REG_STK); } else if (call->IsTailCallViaJitHelper()) { @@ -3628,7 +3627,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); newArgEntry->SetIsBackFilled(isBackFilled); - newArgEntry->nonStandardArgKind = nonStandardArgKind; // Set up the next intArgRegNum and fltArgRegNum values. if (!isBackFilled) @@ -3697,6 +3695,8 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #endif } + newArgEntry->nonStandardArgKind = nonStandardArgKind; + if (GlobalJitOptions::compFeatureHfa) { if (isHfaArg) @@ -3857,7 +3857,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } #endif // DEBUG - if (argEntry->isNonStandard()) + if (argEntry->isNonStandard() && argEntry->isPassedInRegisters()) { // We need to update the node field for this nonStandard arg here // as it may have been changed by the call to fgMorphTree. From 424d5384545da030729222824af1b6f70e5ed40f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 30 Aug 2021 16:48:15 +0200 Subject: [PATCH 14/22] Regenerate JIT interface --- src/coreclr/tools/Common/JitInterface/CorInfoBase.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs b/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs index 36c85113f7d8a4..5e5c89774b7ca2 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoBase.cs @@ -2581,7 +2581,7 @@ static byte _doesFieldBelongToClass(IntPtr thisHandle, IntPtr* ppException, CORI static IntPtr GetUnmanagedCallbacks() { - void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 173); + void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 174); callbacks[0] = (delegate* unmanaged)&_isJitIntrinsic; callbacks[1] = (delegate* unmanaged)&_getMethodAttribs; @@ -2756,6 +2756,7 @@ static IntPtr GetUnmanagedCallbacks() callbacks[170] = (delegate* unmanaged)&_getRelocTypeHint; callbacks[171] = (delegate* unmanaged)&_getExpectedTargetArchitecture; callbacks[172] = (delegate* unmanaged)&_getJitFlags; + callbacks[173] = (delegate* unmanaged)&_doesFieldBelongToClass; return (IntPtr)callbacks; } From 0968851cc3424fe51814f71b8cb48ea475da91b9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 4 Sep 2021 17:41:07 +0200 Subject: [PATCH 15/22] Improve recursion-to-loop decision about which args need to be reassigned --- src/coreclr/jit/morph.cpp | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 025c97437c3f7a..f6a17ef678c032 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -7738,18 +7738,20 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } #endif -// For R2R we might need a different entry point for this call if we are doing a tailcall. -// The reason is that the normal delay load helper uses the return address to find the indirection -// cell in x64, but now the JIT is expected to leave the indirection cell in REG_FASTTAILCALL_TARGET. -// We optimize delegate invocations manually in the JIT so skip this for those. + // For R2R we might need a different entry point for this call if we are doing a tailcall. + // The reason is that the normal delay load helper uses the return address to find the indirection + // cell in x64, but now the JIT is expected to leave the indirection cell in REG_FASTTAILCALL_TARGET. + // We optimize delegate invocations manually in the JIT so skip this for those. + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef FEATURE_READYTORUN_COMPILER if (call->IsR2RRelativeIndir() && canFastTailCall && !fastTailCallToLoop && !call->IsDelegateInvoke()) { info.compCompHnd->updateEntryPointForTailCall(&call->gtEntryPoint); #ifdef TARGET_XARCH - // We need to redo arg info to add the indirection cell arg. - call->fgArgInfo = nullptr; + // We have already computed arg info to make the fast tailcall decision, but on X64 we now + // have to pass the indirection cell, so redo arg info. + call->ResetArgInfo(); #endif } #endif @@ -8935,11 +8937,8 @@ unsigned Compiler::fgGetArgTabEntryParameterLclNum(GenTreeCall* call, fgArgTabEn for (unsigned i = 0; i < argCount; i++) { fgArgTabEntry* arg = argTable[i]; - if (!arg->isNonStandard()) - continue; - - // Ret buffer is added as a local, so don't subtract one for it - if (arg->nonStandardArgKind == NonStandardArgKind::RetBuffer) + // Late added args add extra args that do not map to IL parameters and that we should not reassign. + if (!arg->isNonStandard() || !arg->isNonStandardArgAddedLate()) continue; if (arg->argNum < argTabEntry->argNum) @@ -9040,9 +9039,8 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa { // This is an actual argument that needs to be assigned to the corresponding caller parameter. fgArgTabEntry* curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex); - // Ret buffer is part of locals so assign it - if (!curArgTabEntry->isNonStandard() || - (curArgTabEntry->nonStandardArgKind == NonStandardArgKind::RetBuffer)) + // Late-added non-standard args are extra args that are not passed as locals, so skip those + if (!curArgTabEntry->isNonStandard() || !curArgTabEntry->isNonStandardArgAddedLate()) { Statement* paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, @@ -9068,7 +9066,8 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter. GenTree* lateArg = use.GetNode(); fgArgTabEntry* curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex); - if (!curArgTabEntry->isNonStandard() || curArgTabEntry->nonStandardArgKind == NonStandardArgKind::RetBuffer) + // Late-added non-standard args are extra args that are not passed as locals, so skip those + if (!curArgTabEntry->isNonStandard() || !curArgTabEntry->isNonStandardArgAddedLate()) { Statement* paramAssignStmt = fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, From b345db792a3fc5ea6556cd3dee31e2c25bef6b35 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Sep 2021 16:18:09 +0200 Subject: [PATCH 16/22] Use INS_tail_i_jmp for func token indir --- src/coreclr/jit/emitxarch.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index d9da20060f7b33..ba49170b557d3d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -7538,10 +7538,6 @@ void emitter::emitIns_Call(EmitCallType callType, { ins = INS_l_jmp; } - else if (callType == EC_FUNC_TOKEN_INDIR) - { - ins = INS_i_jmp; - } else { ins = INS_tail_i_jmp; From 0816d52957a93a6ddbd4e5c880371c322b7cf1a2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Sep 2021 18:30:45 +0200 Subject: [PATCH 17/22] More efficient arm64 VSD fast tailcalls, fix some bad merging Also fix logic since we removed tailcall register. For ARM64 that means we now use the single temp reg that is available for R2R/VSD calls, and in LSRA we ensure this goes into a volatile register. --- src/coreclr/jit/codegenarmarch.cpp | 35 +++++++++++++++------ src/coreclr/jit/codegenxarch.cpp | 46 ++++++++++++++++++++------- src/coreclr/jit/lower.cpp | 5 +-- src/coreclr/jit/lsraarmarch.cpp | 14 +++++++-- src/coreclr/jit/morph.cpp | 50 +++++++++--------------------- src/coreclr/jit/targetamd64.h | 6 ++-- src/coreclr/jit/targetx86.h | 7 ++--- 7 files changed, 94 insertions(+), 69 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 22fcb4f1869e13..95ca1bb8e63bf9 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -2351,13 +2351,22 @@ void CodeGen::genCall(GenTreeCall* call) // Indirect fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. genConsumeReg(target); } -#ifdef FEATURE_READYTORUN_COMPILER - else if (call->IsR2RRelativeIndir()) +#ifdef FEATURE_READYTORUN + else if (call->IsR2ROrVirtualStubRelativeIndir()) { - assert(call->gtEntryPoint.accessType == IAT_PVALUE); + assert(((call->IsR2RRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_PVALUE)) || + ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); assert(call->gtControlExpr == nullptr); - GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), REG_FASTTAILCALL_TARGET, - REG_R2R_INDIRECT_PARAM); + + regNumber tmpReg = call->GetSingleTempReg(); + // Register where we save call address in should not be overridden by epilog. + assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_LR)) == tmpReg); + + regNumber callAddrReg = + call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM; + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg); + // We will use this again when emitting the jump in genCallInstruction in the epilog + call->gtRsvdRegs |= genRegMask(tmpReg); } #endif @@ -2566,12 +2575,20 @@ void CodeGen::genCallInstruction(GenTreeCall* call) ((call->IsVirtualStubRelativeIndir()) && (call->gtEntryPoint.accessType == IAT_VALUE))); #endif // FEATURE_READYTORUN assert(call->gtControlExpr == nullptr); - assert(!call->IsTailCall()); regNumber tmpReg = call->GetSingleTempReg(); - regNumber callAddrReg = - call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM; - GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg); + // For fast tailcalls we have already loaded the call target when processing the call node. + if (!call->IsFastTailCall()) + { + regNumber callAddrReg = + call->IsVirtualStubRelativeIndir() ? compiler->virtualStubParamInfo->GetReg() : REG_R2R_INDIRECT_PARAM; + GetEmitter()->emitIns_R_R(ins_Load(TYP_I_IMPL), emitActualTypeSize(TYP_I_IMPL), tmpReg, callAddrReg); + } + else + { + // Register where we save call address in should not be overridden by epilog. + assert((tmpReg & (RBM_INT_CALLEE_TRASH & ~RBM_LR)) == tmpReg); + } // We have now generated code for gtControlExpr evaluating it into `tmpReg`. // We just need to emit "call tmpReg" in this case. diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 54c41b3d5ba13a..6475c990feb494 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5701,18 +5701,40 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA { emitter::EmitCallType type = (call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR; - // clang-format off - genEmitCall(type, - methHnd, - INDEBUG_LDISASM_COMMA(sigInfo) - (void*)call->gtEntryPoint.addr - X86_ARG(argSizeForEmitter), - retSize - MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - ilOffset, - REG_NA, - call->IsFastTailCall()); - // clang-format on + if (call->IsFastTailCall() && (type == emitter::EC_FUNC_TOKEN_INDIR)) + { + // For fast tailcall with func token indir we already have the indirection cell in REG_R2R_INDIRECT_PARAM, + // so get it from there. + // clang-format off + GetEmitter()->emitIns_Call( + emitter::EC_INDIR_ARD, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + nullptr, + 0, + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, + ilOffset, REG_R2R_INDIRECT_PARAM, REG_NA, 0, 0, true); + // clang-format on + } + else + { + // clang-format off + genEmitCall(type, + methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) + (void*)call->gtEntryPoint.addr + X86_ARG(argSizeForEmitter), + retSize + MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + ilOffset, + REG_NA, + call->IsFastTailCall()); + // clang-format on + } } #endif else diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 755ca4cdb6c8fd..4af52751989937 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4817,15 +4817,12 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) } else { - bool shouldOptimizeVirtualStubCall = false; #if defined(FEATURE_READYTORUN) && defined(TARGET_ARMARCH) // Skip inserting the indirection node to load the address that is already // computed in REG_R2R_INDIRECT_PARAM as a hidden parameter. Instead during the // codegen, just load the call target from REG_R2R_INDIRECT_PARAM. - // However, for tail calls, the call target is always computed in RBM_FASTTAILCALL_TARGET - // and so do not optimize virtual stub calls for such cases. - shouldOptimizeVirtualStubCall = !call->IsTailCall(); + shouldOptimizeVirtualStubCall = true; #endif // FEATURE_READYTORUN && TARGET_ARMARCH if (!shouldOptimizeVirtualStubCall) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 7cde2796b5a1d1..ef62330b0aefc0 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -178,12 +178,22 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = RBM_INT_CALLEE_TRASH; + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(ctrlExprCandidates != RBM_NONE); } } else if (call->IsR2ROrVirtualStubRelativeIndir()) { - buildInternalIntRegisterDefForNode(call); + // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM + // and will load call address into the temp register from this register. + regMaskTP candidates = RBM_NONE; + if (call->IsFastTailCall()) + { + candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + assert(candidates != RBM_NONE); + } + + buildInternalIntRegisterDefForNode(call, candidates); } #ifdef TARGET_ARM else diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4919cf58d0b2ca..dbe66697df9c8b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2828,10 +2828,20 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); } #if defined(FEATURE_READYTORUN) -#if defined(TARGET_ARMARCH) - // For arm, we dispatch code same as VSD using virtualStubParamInfo->GetReg() + // For arm/arm64, we dispatch code same as VSD using virtualStubParamInfo->GetReg() // for indirection cell address, which ZapIndirectHelperThunk expects. - if (call->IsR2RRelativeIndir() && !call->IsDelegateInvoke()) + // For x64/x86 we use return address to get the indirection cell by disassembling the call site. + // That is not possible for fast tailcalls, so we only need this logic for fast tailcalls on xarch. + // Note that we call this before we know if something will be a fast tailcall or not. + // That's ok; after making something a tailcall, we will invalidate this information + // and reconstruct it if necessary. The tailcalling decision does not change since + // this is a non-standard arg in a register. + bool needsIndirectionCell = call->IsR2RRelativeIndir() && !call->IsDelegateInvoke(); +#if defined(TARGET_XARCH) + needsIndirectionCell &= call->IsFastTailCall(); +#endif + + if (needsIndirectionCell) { assert(call->gtEntryPoint.addr != nullptr); @@ -2856,33 +2866,6 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum(), NonStandardArgKind::R2RIndirectionCell); } -#elif defined(TARGET_XARCH) - // For xarch fast tailcalls we do the same as ARM, leaving the indirection cell in the - // fast tailcall register. - // Note that we call this before we know if something will be a fast tailcall or not. - // That's ok; after making something a tailcall, we will invalidate this information - // and reconstruct it if necessary. The tailcalling decision does not change since - // this is a non-standard arg in a register. - if (call->IsR2RRelativeIndir() && call->IsFastTailCall() && !call->IsDelegateInvoke()) - { - assert(call->gtEntryPoint.addr != nullptr); - - size_t addrValue = (size_t)call->gtEntryPoint.addr; - GenTree* indirectCellAddress = gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR); -#ifdef DEBUG - indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; -#endif - indirectCellAddress->SetRegNum(REG_FASTTAILCALL_TARGET); - - // Push the stub address onto the list of arguments. - call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); - - numArgs++; - nonStandardArgs.Add(indirectCellAddress, indirectCellAddress->GetRegNum(), - NonStandardArgKind::R2RIndirectionCell); - } - -#endif #endif // Allocate the fgArgInfo for the call node; @@ -3224,7 +3207,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); } #else // !UNIX_AMD64_ABI - size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' + size = 1; // On AMD64 Windows, all args fit in a single (64-bit) 'slot' if (!isStructArg) { byteSize = genTypeSize(argx); @@ -7718,10 +7701,8 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // For R2R we might need a different entry point for this call if we are doing a tailcall. // The reason is that the normal delay load helper uses the return address to find the indirection - // cell in x64, but now the JIT is expected to leave the indirection cell in REG_FASTTAILCALL_TARGET. + // cell in xarch, but now the JIT is expected to leave the indirection cell in REG_R2R_INDIRECT_PARAM: // We optimize delegate invocations manually in the JIT so skip this for those. - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef FEATURE_READYTORUN_COMPILER if (call->IsR2RRelativeIndir() && canFastTailCall && !fastTailCallToLoop && !call->IsDelegateInvoke()) { info.compCompHnd->updateEntryPointForTailCall(&call->gtEntryPoint); @@ -7732,7 +7713,6 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) call->ResetArgInfo(); #endif } -#endif // If this block has a flow successor, make suitable updates. // diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 6f177051bf9454..e4ec28537ed716 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -222,9 +222,9 @@ #define REG_DEFAULT_HELPER_CALL_TARGET REG_RAX #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_RAX - #define REG_FASTTAILCALL_TARGET REG_RAX // Target register for fast tail call/indirection cell for R2R fast tailcall - // See ImportThunk.Kind.DelayLoadHelperWithExistingIndirectionCell in crossgen2. - #define RBM_FASTTAILCALL_TARGET RBM_RAX + #define REG_R2R_INDIRECT_PARAM REG_RAX // Indirection cell for R2R fast tailcall + // See ImportThunk.Kind.DelayLoadHelperWithExistingIndirectionCell in crossgen2. + #define RBM_R2R_INDIRECT_PARAM RBM_RAX // GenericPInvokeCalliHelper VASigCookie Parameter #define REG_PINVOKE_COOKIE_PARAM REG_R11 diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 3bab96e389baec..776c9deece36ef 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -163,6 +163,9 @@ #define REG_JUMP_THUNK_PARAM REG_EAX #define RBM_JUMP_THUNK_PARAM RBM_EAX + #define REG_R2R_INDIRECT_PARAM REG_EAX // Indirection cell for R2R fast tailcall, not currently used in x86. + #define RBM_R2R_INDIRECT_PARAM RBM_EAX + #if NOGC_WRITE_BARRIERS #define REG_WRITE_BARRIER REG_EDX #define RBM_WRITE_BARRIER RBM_EDX @@ -188,10 +191,6 @@ #define REG_VIRTUAL_STUB_TARGET REG_EAX #define RBM_VIRTUAL_STUB_TARGET RBM_EAX - // Register where tailcall target is left, not currently used. - #define REG_FASTTAILCALL_TARGET REG_EAX - #define RBM_FASTTAILCALL_TARGET RBM_EAX - // Registers used by PInvoke frame setup #define REG_PINVOKE_FRAME REG_EDI // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper #define RBM_PINVOKE_FRAME RBM_EDI From 863ad4d344bcdacebb00c8c921c7345fd63c7d7b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Sep 2021 20:46:31 +0200 Subject: [PATCH 18/22] Take R2R indirection into account for tail call profitability On x64 an R2R indirected direct call normally disassembles the call site to determine the indirection cell, so it is more expensive to do tail calls in this scenario as the indirection cell needs to be passed in a register. Take this into account: if there is no tail. prefix, do normal calls, and otherwise use tail calls. --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/jitconfigvalues.h | 1 - src/coreclr/jit/morph.cpp | 125 +++++++++++++----------------- 3 files changed, 55 insertions(+), 72 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6b5943685400b7..d6c472a7e9ca36 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6231,6 +6231,7 @@ class Compiler private: GenTree* fgMorphField(GenTree* tree, MorphAddrContext* mac); bool fgCanFastTailCall(GenTreeCall* call, const char** failReason); + bool fgShouldFastTailCall(GenTreeCall* call, const char** failReason); #if FEATURE_FASTTAILCALL bool fgCallHasMustCopyByrefParameter(GenTreeCall* callee); #endif diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index c86c600acb6563..0f8d3e5fd49f9e 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -122,7 +122,6 @@ CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables s CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0) CONFIG_INTEGER(JitOrder, W("JitOrder"), 0) CONFIG_INTEGER(JitQueryCurrentStaticFieldClass, W("JitQueryCurrentStaticFieldClass"), 1) -CONFIG_INTEGER(JitReportFastTailCallDecisions, W("JitReportFastTailCallDecisions"), 0) CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0) CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1) CONFIG_METHODSET(JitPrintInlinedMethods, W("JitPrintInlinedMethods")) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index dbe66697df9c8b..1802f642bd4abe 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -6782,7 +6782,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // // Arguments: // callee - The callee to check -// failReason - If this method returns false, the reason why. Can be nullptr. +// failReason - If this method returns false, the reason why. // // Return Value: // Returns true or false based on whether the callee can be fastTailCalled @@ -6922,65 +6922,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } calleeArgStackSize = GetOutgoingArgByteSize(calleeArgStackSize); - auto reportFastTailCallDecision = [&](const char* thisFailReason) { - if (failReason != nullptr) - { - *failReason = thisFailReason; - } - -#ifdef DEBUG - if ((JitConfig.JitReportFastTailCallDecisions()) == 1) - { - if (callee->gtCallType != CT_INDIRECT) - { - const char* methodName; - - methodName = eeGetMethodFullName(callee->gtCallMethHnd); - - printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ", - info.compFullName, methodName); - } - else - { - printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " - "Decision: ", - info.compFullName); - } - - if (thisFailReason == nullptr) - { - printf("Will fast tailcall"); - } - else - { - printf("Will not fast tailcall (%s)", thisFailReason); - } - - printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize); - } - else - { - if (thisFailReason == nullptr) - { - JITDUMP("[Fast tailcall decision]: Will fast tailcall\n"); - } - else - { - JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason); - } - } -#endif // DEBUG - }; - if (!opts.compFastTailCalls) { - reportFastTailCallDecision("Configuration doesn't allow fast tail calls"); + *failReason = "Configuration doesn't allow fast tail calls"; return false; } if (callee->IsStressTailCall()) { - reportFastTailCallDecision("Fast tail calls are not performed under tail call stress"); + *failReason = "Fast tail calls are not performed under tail call stress"; return false; } @@ -7000,14 +6950,14 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) #if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) if (info.compIsVarArgs || callee->IsVarargs()) { - reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64"); + *failReason = "Fast tail calls with varargs not supported on Windows ARM/ARM64"; return false; } #endif // (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) if (compLocallocUsed) { - reportFastTailCallDecision("Localloc used"); + *failReason = "Localloc used"; return false; } @@ -7018,7 +6968,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // tail call. if (getNeedsGSSecurityCookie()) { - reportFastTailCallDecision("GS Security cookie check required"); + *failReason = "GS Security cookie check required"; return false; } #endif @@ -7026,7 +6976,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // If the NextCallReturnAddress intrinsic is used we should do normal calls. if (info.compHasNextCallRetAddr) { - reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic"); + *failReason = "Uses NextCallReturnAddress intrinsic"; return false; } @@ -7036,7 +6986,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // Otherwise go the slow route. if (info.compRetBuffArg == BAD_VAR_NUM) { - reportFastTailCallDecision("Callee has RetBuf but caller does not."); + *failReason = "Callee has RetBuf but caller does not."; return false; } } @@ -7048,7 +6998,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // as non-interruptible for fast tail calls. if (calleeArgStackSize > callerArgStackSize) { - reportFastTailCallDecision("Not enough incoming arg space"); + *failReason = "Not enough incoming arg space"; return false; } @@ -7057,19 +7007,46 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // as we need to keep our frame around. if (fgCallHasMustCopyByrefParameter(callee)) { - reportFastTailCallDecision("Callee has a byref parameter"); + *failReason = "Callee has a byref parameter"; return false; } - reportFastTailCallDecision(nullptr); + *failReason = nullptr; return true; #else // FEATURE_FASTTAILCALL - if (failReason) - *failReason = "Fast tailcalls are not supported on this platform"; + *failReason = "Fast tailcalls are not supported on this platform"; return false; #endif } +//------------------------------------------------------------------------ +// fgShouldFastTailCall: Do profitability checks for whether to do a fast tailcall. +// +// Arguments: +// call - The call to check +// failReason - If this method returns false, the reason why. Can be nullptr. +// +// Return Value: +// Returns true or false based on whether the callee can be fast tailcalled. +// +bool Compiler::fgShouldFastTailCall(GenTreeCall* call, const char** failReason) +{ +#ifdef TARGET_AMD64 + // Indirected R2R calls on x64 need indirection cell that is normally + // retrieved by disassembling caller instructions. If we tailcall, then we + // need to pass the indirection cell in a register which makes the call + // site larger. + if (call->IsR2RRelativeIndir()) + { + *failReason = "Tailcall with R2R indirection cell requires extra argument that increases call site size"; + return false; + } +#endif + + *failReason = nullptr; + return true; +} + //------------------------------------------------------------------------ // fgCallHasMustCopyByrefParameter: Check to see if this call has a byref parameter that // requires a struct copy in the caller. @@ -7531,19 +7508,25 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) const char* failReason = nullptr; bool canFastTailCall = fgCanFastTailCall(call, &failReason); + if (!canFastTailCall && call->IsImplicitTailCall()) + { + // Implicit or opportunistic tail calls are always dispatched via fast tail call + // mechanism and never via tail call helper for perf. + failTailCall(failReason); + return nullptr; + } + + if (call->IsImplicitTailCall() && !fgShouldFastTailCall(call, &failReason)) + { + // Implicit tailcall is not profitable + failTailCall(failReason); + return nullptr; + } CORINFO_TAILCALL_HELPERS tailCallHelpers; bool tailCallViaJitHelper = false; if (!canFastTailCall) { - if (call->IsImplicitTailCall()) - { - // Implicit or opportunistic tail calls are always dispatched via fast tail call - // mechanism and never via tail call helper for perf. - failTailCall(failReason); - return nullptr; - } - assert(call->IsTailPrefixedCall()); assert(call->tailCallInfo != nullptr); From 07782639a589cef6c0259d80f7f0cac04e3d8830 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Sep 2021 21:19:02 +0200 Subject: [PATCH 19/22] Disallow tailcalls via JIT helper in R2R builds --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1802f642bd4abe..dfda286f63f45e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -18215,7 +18215,7 @@ bool Compiler::fgCheckStmtAfterTailCall() // bool Compiler::fgCanTailCallViaJitHelper() { -#if !defined(TARGET_X86) || defined(UNIX_X86_ABI) +#if !defined(TARGET_X86) || defined(UNIX_X86_ABI) || defined(FEATURE_READYTORUN) // On anything except windows X86 we have no faster mechanism available. return false; #else From 16e32b0e56750c018119680b64c19b05de20ea24 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 24 Sep 2021 22:45:53 +0200 Subject: [PATCH 20/22] Revert "Take R2R indirection into account for tail call profitability" This reverts commit 863ad4d344bcdacebb00c8c921c7345fd63c7d7b. Let's not divert on the behavior here. It is not clear that having a smaller call site is better than tail calling albeit with a larger call site. --- src/coreclr/jit/compiler.h | 1 - src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/morph.cpp | 125 +++++++++++++++++------------- 3 files changed, 72 insertions(+), 55 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d6c472a7e9ca36..6b5943685400b7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6231,7 +6231,6 @@ class Compiler private: GenTree* fgMorphField(GenTree* tree, MorphAddrContext* mac); bool fgCanFastTailCall(GenTreeCall* call, const char** failReason); - bool fgShouldFastTailCall(GenTreeCall* call, const char** failReason); #if FEATURE_FASTTAILCALL bool fgCallHasMustCopyByrefParameter(GenTreeCall* callee); #endif diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 0f8d3e5fd49f9e..c86c600acb6563 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -122,6 +122,7 @@ CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables s CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0) CONFIG_INTEGER(JitOrder, W("JitOrder"), 0) CONFIG_INTEGER(JitQueryCurrentStaticFieldClass, W("JitQueryCurrentStaticFieldClass"), 1) +CONFIG_INTEGER(JitReportFastTailCallDecisions, W("JitReportFastTailCallDecisions"), 0) CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0) CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1) CONFIG_METHODSET(JitPrintInlinedMethods, W("JitPrintInlinedMethods")) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index dfda286f63f45e..b4fe84d2ed9a0c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -6782,7 +6782,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // // Arguments: // callee - The callee to check -// failReason - If this method returns false, the reason why. +// failReason - If this method returns false, the reason why. Can be nullptr. // // Return Value: // Returns true or false based on whether the callee can be fastTailCalled @@ -6922,15 +6922,65 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } calleeArgStackSize = GetOutgoingArgByteSize(calleeArgStackSize); + auto reportFastTailCallDecision = [&](const char* thisFailReason) { + if (failReason != nullptr) + { + *failReason = thisFailReason; + } + +#ifdef DEBUG + if ((JitConfig.JitReportFastTailCallDecisions()) == 1) + { + if (callee->gtCallType != CT_INDIRECT) + { + const char* methodName; + + methodName = eeGetMethodFullName(callee->gtCallMethHnd); + + printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ", + info.compFullName, methodName); + } + else + { + printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " + "Decision: ", + info.compFullName); + } + + if (thisFailReason == nullptr) + { + printf("Will fast tailcall"); + } + else + { + printf("Will not fast tailcall (%s)", thisFailReason); + } + + printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize); + } + else + { + if (thisFailReason == nullptr) + { + JITDUMP("[Fast tailcall decision]: Will fast tailcall\n"); + } + else + { + JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason); + } + } +#endif // DEBUG + }; + if (!opts.compFastTailCalls) { - *failReason = "Configuration doesn't allow fast tail calls"; + reportFastTailCallDecision("Configuration doesn't allow fast tail calls"); return false; } if (callee->IsStressTailCall()) { - *failReason = "Fast tail calls are not performed under tail call stress"; + reportFastTailCallDecision("Fast tail calls are not performed under tail call stress"); return false; } @@ -6950,14 +7000,14 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) #if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) if (info.compIsVarArgs || callee->IsVarargs()) { - *failReason = "Fast tail calls with varargs not supported on Windows ARM/ARM64"; + reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64"); return false; } #endif // (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) if (compLocallocUsed) { - *failReason = "Localloc used"; + reportFastTailCallDecision("Localloc used"); return false; } @@ -6968,7 +7018,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // tail call. if (getNeedsGSSecurityCookie()) { - *failReason = "GS Security cookie check required"; + reportFastTailCallDecision("GS Security cookie check required"); return false; } #endif @@ -6976,7 +7026,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // If the NextCallReturnAddress intrinsic is used we should do normal calls. if (info.compHasNextCallRetAddr) { - *failReason = "Uses NextCallReturnAddress intrinsic"; + reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic"); return false; } @@ -6986,7 +7036,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // Otherwise go the slow route. if (info.compRetBuffArg == BAD_VAR_NUM) { - *failReason = "Callee has RetBuf but caller does not."; + reportFastTailCallDecision("Callee has RetBuf but caller does not."); return false; } } @@ -6998,7 +7048,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // as non-interruptible for fast tail calls. if (calleeArgStackSize > callerArgStackSize) { - *failReason = "Not enough incoming arg space"; + reportFastTailCallDecision("Not enough incoming arg space"); return false; } @@ -7007,46 +7057,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // as we need to keep our frame around. if (fgCallHasMustCopyByrefParameter(callee)) { - *failReason = "Callee has a byref parameter"; + reportFastTailCallDecision("Callee has a byref parameter"); return false; } - *failReason = nullptr; + reportFastTailCallDecision(nullptr); return true; #else // FEATURE_FASTTAILCALL - *failReason = "Fast tailcalls are not supported on this platform"; + if (failReason) + *failReason = "Fast tailcalls are not supported on this platform"; return false; #endif } -//------------------------------------------------------------------------ -// fgShouldFastTailCall: Do profitability checks for whether to do a fast tailcall. -// -// Arguments: -// call - The call to check -// failReason - If this method returns false, the reason why. Can be nullptr. -// -// Return Value: -// Returns true or false based on whether the callee can be fast tailcalled. -// -bool Compiler::fgShouldFastTailCall(GenTreeCall* call, const char** failReason) -{ -#ifdef TARGET_AMD64 - // Indirected R2R calls on x64 need indirection cell that is normally - // retrieved by disassembling caller instructions. If we tailcall, then we - // need to pass the indirection cell in a register which makes the call - // site larger. - if (call->IsR2RRelativeIndir()) - { - *failReason = "Tailcall with R2R indirection cell requires extra argument that increases call site size"; - return false; - } -#endif - - *failReason = nullptr; - return true; -} - //------------------------------------------------------------------------ // fgCallHasMustCopyByrefParameter: Check to see if this call has a byref parameter that // requires a struct copy in the caller. @@ -7508,25 +7531,19 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) const char* failReason = nullptr; bool canFastTailCall = fgCanFastTailCall(call, &failReason); - if (!canFastTailCall && call->IsImplicitTailCall()) - { - // Implicit or opportunistic tail calls are always dispatched via fast tail call - // mechanism and never via tail call helper for perf. - failTailCall(failReason); - return nullptr; - } - - if (call->IsImplicitTailCall() && !fgShouldFastTailCall(call, &failReason)) - { - // Implicit tailcall is not profitable - failTailCall(failReason); - return nullptr; - } CORINFO_TAILCALL_HELPERS tailCallHelpers; bool tailCallViaJitHelper = false; if (!canFastTailCall) { + if (call->IsImplicitTailCall()) + { + // Implicit or opportunistic tail calls are always dispatched via fast tail call + // mechanism and never via tail call helper for perf. + failTailCall(failReason); + return nullptr; + } + assert(call->IsTailPrefixedCall()); assert(call->tailCallInfo != nullptr); From d5729a73f09902479ce4d77ef0c08c327b36685c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 25 Sep 2021 13:52:19 +0200 Subject: [PATCH 21/22] Add SPMI support, clean up mcPackets enum --- .../superpmi/superpmi-shared/lwmlist.h | 1 + .../superpmi-shared/methodcontext.cpp | 35 ++ .../superpmi/superpmi-shared/methodcontext.h | 386 +++++++++--------- .../superpmi-shared/spmirecordhelper.h | 8 +- .../superpmi-shim-collector/icorjitinfo.cpp | 3 + .../ToolBox/superpmi/superpmi/icorjitinfo.cpp | 1 + 6 files changed, 237 insertions(+), 197 deletions(-) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h index 4bffe52ecb4f63..056904894d1f01 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h @@ -128,6 +128,7 @@ LWM(GetSharedCCtorHelper, DWORDLONG, DWORD) LWM(GetStringConfigValue, DWORD, DWORD) LWM(GetSystemVAmd64PassStructInRegisterDescriptor, DWORDLONG, Agnostic_GetSystemVAmd64PassStructInRegisterDescriptor) LWM(GetTailCallHelpers, Agnostic_GetTailCallHelpers, Agnostic_CORINFO_TAILCALL_HELPERS) +LWM(UpdateEntryPointForTailCall, Agnostic_CORINFO_CONST_LOOKUP, Agnostic_CORINFO_CONST_LOOKUP) LWM(GetThreadTLSIndex, DWORD, DLD) LWM(GetTokenTypeAsHandle, GetTokenTypeAsHandleValue, DWORDLONG) LWM(GetTypeForBox, DWORDLONG, DWORDLONG) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index 7d4a191c51cf62..97e3b852dda89c 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -6599,6 +6599,41 @@ bool MethodContext::repGetTailCallHelpers( return true; } +void MethodContext::recUpdateEntryPointForTailCall( + const CORINFO_CONST_LOOKUP& origEntryPoint, + const CORINFO_CONST_LOOKUP& newEntryPoint) +{ + if (UpdateEntryPointForTailCall == nullptr) + UpdateEntryPointForTailCall = new LightWeightMap(); + + Agnostic_CORINFO_CONST_LOOKUP key = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&origEntryPoint); + Agnostic_CORINFO_CONST_LOOKUP value = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(&newEntryPoint); + UpdateEntryPointForTailCall->Add(key, value); + DEBUG_REC(dmpUpdateEntryPointForTailCall(key, value)); +} + +void MethodContext::dmpUpdateEntryPointForTailCall( + const Agnostic_CORINFO_CONST_LOOKUP& origEntryPoint, + const Agnostic_CORINFO_CONST_LOOKUP& newEntryPoint) +{ + printf("UpdateEntryPointForTailcall orig=%s new=%s", + SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(origEntryPoint).c_str(), + SpmiDumpHelper::DumpAgnostic_CORINFO_CONST_LOOKUP(newEntryPoint).c_str()); +} + +void MethodContext::repUpdateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) +{ + AssertMapExistsNoMessage(UpdateEntryPointForTailCall); + + Agnostic_CORINFO_CONST_LOOKUP key = SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP(entryPoint); + AssertKeyExistsNoMessage(UpdateEntryPointForTailCall, key); + + Agnostic_CORINFO_CONST_LOOKUP value = UpdateEntryPointForTailCall->Get(key); + DEBUG_REP(dmpUpdateEntryPointForTailCall(key, value)); + + *entryPoint = SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP(value); +} + void MethodContext::recGetMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod, mdMethodDef result) { if (GetMethodDefFromMethod == nullptr) diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h index 4abbef3faf135d..4b659daa2f736b 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h @@ -806,6 +806,10 @@ class MethodContext CORINFO_GET_TAILCALL_HELPERS_FLAGS flags, CORINFO_TAILCALL_HELPERS* pResult); + void recUpdateEntryPointForTailCall(const CORINFO_CONST_LOOKUP& origEntryPoint, const CORINFO_CONST_LOOKUP& newEntryPoint); + void dmpUpdateEntryPointForTailCall(const Agnostic_CORINFO_CONST_LOOKUP& origEntryPoint, const Agnostic_CORINFO_CONST_LOOKUP& newEntryPoint); + void repUpdateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint); + void recGetMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod, mdMethodDef result); void dmpGetMethodDefFromMethod(DWORDLONG key, DWORD value); mdMethodDef repGetMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod); @@ -904,203 +908,199 @@ class MethodContext } }; -// ********************* Please keep this up-to-date to ease adding more *************** -// Highest packet number: 192 -// ************************************************************************************* enum mcPackets { - Packet_AllocMethodBlockCounts = 131, // retired 1/4/2021 - Packet_AppendClassName = 149, // Added 8/6/2014 - needed for SIMD - Packet_AreTypesEquivalent = 1, - Packet_AsCorInfoType = 2, - Packet_CanAccessClass = 3, - Packet_CanAccessFamily = 4, - Packet_CanCast = 5, - Retired8 = 6, - Packet_GetLazyStringLiteralHelper = 147, // Added 12/20/2013 - as a replacement for CanEmbedModuleHandleForHelper + Packet_AreTypesEquivalent = 1, + Packet_AsCorInfoType = 2, + Packet_CanAccessClass = 3, + Packet_CanAccessFamily = 4, + Packet_CanCast = 5, Packet_CanGetCookieForPInvokeCalliSig = 7, - Packet_CanGetVarArgsHandle = 8, - Packet_CanInline = 9, - Packet_CanInlineTypeCheck = 173, // Added 11/15/2018 as a replacement for CanInlineTypeCheckWithObjectVTable - Packet_CanInlineTypeCheckWithObjectVTable = 10, - Packet_CanSkipMethodVerification = 11, // Retired 2/18/2020 - Packet_CanTailCall = 12, - Retired4 = 13, - Packet_CheckMethodModifier = 142, // retired as 13 on 2013/07/04 - Retired3 = 14, - Retired5 = 141, // retired as 14 on 2013/07/03 - Packet_CompareTypesForCast = 163, // Added 10/4/17 - Packet_CompareTypesForEquality = 164, // Added 10/4/17 - Packet_CompileMethod = 143, // retired as 141 on 2013/07/09 - Packet_ConstructStringLiteral = 15, - Packet_ConvertPInvokeCalliToCall = 169, // Added 4/29/18 - Packet_EmbedClassHandle = 16, - Packet_EmbedFieldHandle = 17, - Packet_EmbedGenericHandle = 18, - Packet_EmbedMethodHandle = 19, - Packet_EmbedModuleHandle = 20, - Packet_EmptyStringLiteral = 21, - Retired9 = 136, - Packet_ErrorList = 22, - Packet_FilterException = 134, - Packet_FindCallSiteSig = 23, - Retired7 = 24, - Packet_FindNameOfToken = 145, // Added 7/19/2013 - adjusted members to proper types - Packet_GetSystemVAmd64PassStructInRegisterDescriptor = 156, // Added 2/17/2016 - Packet_FindSig = 25, - Packet_GetAddressOfPInvokeFixup = 26, // Retired 2/18/2020 - Packet_GetAddressOfPInvokeTarget = 153, // Added 2/3/2016 - Packet_GetAddrOfCaptureThreadGlobal = 27, - Retired1 = 28, - Packet_GetArgClass = 139, // retired as 28 on 2013/07/03 - Packet_GetHFAType = 159, - Packet_GetArgNext = 29, - Retired2 = 30, - Packet_GetArgType = 140, // retired as 30 on 2013/07/03 - Packet_GetArrayInitializationData = 31, - Packet_GetArrayRank = 32, - Packet_GetMethodBlockCounts = 33, - Packet_GetBoundaries = 34, - Packet_GetBoxHelper = 35, - Packet_GetBuiltinClass = 36, - Packet_GetCallInfo = 37, - Packet_GetCastingHelper = 38, - Packet_GetChildType = 39, - Packet_GetClassAlignmentRequirement = 40, - Packet_GetClassAttribs = 41, - Packet_GetClassDomainID = 42, - Packet_GetClassGClayout = 43, - Packet_GetClassModuleIdForStatics = 44, - Packet_GetClassName = 45, - Packet_GetClassNameFromMetadata = 166, // Added 12/4/17 - Packet_GetTypeInstantiationArgument = 167, // Added 12/4/17 - Packet_GetClassNumInstanceFields = 46, - Packet_GetClassSize = 47, - Packet_GetHeapClassSize = 170, // Added 10/5/2018 - Packet_CanAllocateOnStack = 171, // Added 10/5/2018 - Packet_GetIntConfigValue = 151, // Added 2/12/2015 - Packet_GetStringConfigValue = 152, // Added 2/12/2015 - Packet_GetCookieForPInvokeCalliSig = 48, - Packet_GetDefaultComparerClass = 188, // Added 2/10/2021 - Packet_GetDefaultEqualityComparerClass = 162, // Added 9/24/2017 - Packet_GetDelegateCtor = 49, - Packet_GetEEInfo = 50, - Packet_GetEHinfo = 51, - Packet_GetFieldAddress = 52, - Packet_GetStaticFieldCurrentClass = 172, // Added 11/7/2018 - Packet_GetFieldClass = 53, - Packet_GetFieldInClass = 54, - Packet_GetFieldInfo = 55, - Packet_GetFieldName = 56, - Packet_GetFieldOffset = 57, - Packet_GetFieldThreadLocalStoreID = 58, - Packet_GetFieldType = 59, - Packet_GetFunctionEntryPoint = 60, - Packet_GetFunctionFixedEntryPoint = 61, - Packet_GetGSCookie = 62, - Packet_GetHelperFtn = 63, - Packet_GetHelperName = 64, - Packet_GetInlinedCallFrameVptr = 65, - Packet_GetIntrinsicID = 66, - Packet_GetJitFlags = 154, // Added 2/3/2016 - Packet_GetJitTimeLogFilename = 67, - Packet_GetJustMyCodeHandle = 68, - Retired10 = 182, // Added 9/27/2020 // was Packet_GetLikelyClass - Packet_GetLocationOfThisType = 69, - Packet_IsJitIntrinsic = 192, - Packet_GetMethodAttribs = 70, - Packet_GetMethodClass = 71, - Packet_GetMethodModule = 181, // Added 11/20/2020 - Packet_GetMethodDefFromMethod = 72, - Packet_GetMethodHash = 73, - Packet_GetMethodInfo = 74, - Packet_GetMethodName = 75, - Packet_GetMethodNameFromMetadata = 161, // Added 9/6/17 - Packet_GetMethodSig = 76, - Packet_GetMethodSync = 77, - Packet_GetMethodVTableOffset = 78, - Packet_GetNewArrHelper = 79, - Packet_GetNewHelper = 80, - Packet_GetOSRInfo = 177, // Added 3/5/2020 - Packet_GetParentType = 81, - Packet_GetPInvokeUnmanagedTarget = 82, // Retired 2/18/2020 - Packet_GetProfilingHandle = 83, - Packet_GetRelocTypeHint = 84, - Packet_GetExpectedTargetArchitecture = 183, // Added 12/18/2020 - Packet_GetSecurityPrologHelper = 85, // Retired 2/18/2020 - Packet_GetSharedCCtorHelper = 86, - Packet_GetTailCallCopyArgsThunk = 87, // Retired 4/27/2020 - Packet_GetTailCallHelpers = 178, // Added 3/18/2020 - Packet_GetThreadTLSIndex = 88, - Packet_GetTokenTypeAsHandle = 89, - Packet_GetTypeForBox = 90, - Packet_GetTypeForPrimitiveValueClass = 91, - Packet_GetTypeForPrimitiveNumericClass = 168, // Added 12/7/2017 - Packet_GetUnboxedEntry = 165, // Added 10/26/17 - Packet_GetUnBoxHelper = 92, - Packet_GetReadyToRunHelper = 150, // Added 10/10/2014 - Packet_GetReadyToRunDelegateCtorHelper = 157, // Added 3/30/2016 - Packet_GetUnmanagedCallConv = 94, - Packet_GetVarArgsHandle = 95, - Packet_GetVars = 96, - Packet_HandleException = 135, // Retired 7/19/2021 - Packet_InitClass = 97, - Packet_InitConstraintsForVerification = 98, // Retired 2/18/2020 - Packet_IsCompatibleDelegate = 99, - Packet_IsDelegateCreationAllowed = 155, - Packet_IsFieldStatic = 137, // Added 4/9/2013 - needed for 4.5.1 - Packet_IsIntrinsicType = 148, // Added 10/26/2019 - SIMD support - Packet_IsInstantiationOfVerifiedGeneric = 100, // Retired 2/18/2020 - Packet_IsSDArray = 101, - Packet_IsStructRequiringStackAllocRetBuf = 102, - Packet_IsValidStringRef = 103, - Packet_GetStringLiteral = 175, // Added 1/7/2020 - Retired6 = 104, - Packet_IsValidToken = 144, // Added 7/19/2013 - adjusted members to proper types - Packet_IsValueClass = 105, - Packet_IsWriteBarrierHelperRequired = 106, // Retired 2/18/2020 - Packet_MergeClasses = 107, - Packet_IsMoreSpecificType = 174, // Added 2/14/2019 - Packet_PInvokeMarshalingRequired = 108, - Packet_ResolveToken = 109, - Packet_ResolveVirtualMethod = 160, // Added 2/13/17 - Packet_TryResolveToken = 158, // Added 4/26/2016 - Packet_SatisfiesClassConstraints = 110, - Packet_SatisfiesMethodConstraints = 111, - Packet_DoesFieldBelongToClass = 112, // Added 8/12/2021 - Packet_SigInstHandleMap = 184, - Packet_AllocPgoInstrumentationBySchema = 186, // Added 1/4/2021 - Packet_GetPgoInstrumentationResults = 187, // Added 1/4/2021 - Packet_GetClassModule = 189, // Added 2/19/2021 - Packet_GetModuleAssembly = 190, // Added 2/19/2021 - Packet_GetAssemblyName = 191, // Added 2/19/2021 - - PacketCR_AddressMap = 113, - PacketCR_AllocGCInfo = 114, - PacketCR_AllocMem = 115, - PacketCR_AllocUnwindInfo = 132, - PacketCR_AssertLog = 138, // Added 6/10/2013 - added to nicely support ilgen - PacketCR_CallLog = 116, - PacketCR_ClassMustBeLoadedBeforeCodeIsRun = 117, - PacketCR_CompileMethod = 118, - PacketCR_MessageLog = 119, + Packet_CanGetVarArgsHandle = 8, + Packet_CanInline = 9, + //Packet_CanInlineTypeCheckWithObjectVTable = 10, + //Packet_CanSkipMethodVerification = 11, + Packet_CanTailCall = 12, + //Retired4 = 13, + //Retired3 = 14, + Packet_ConstructStringLiteral = 15, + Packet_EmbedClassHandle = 16, + Packet_EmbedFieldHandle = 17, + Packet_EmbedGenericHandle = 18, + Packet_EmbedMethodHandle = 19, + Packet_EmbedModuleHandle = 20, + Packet_EmptyStringLiteral = 21, + Packet_ErrorList = 22, + Packet_FindCallSiteSig = 23, + //Retired7 = 24, + Packet_FindSig = 25, + Packet_GetAddressOfPInvokeFixup = 26, + Packet_GetAddrOfCaptureThreadGlobal = 27, + //Retired1 = 28, + Packet_GetArgNext = 29, + //Retired2 = 30, + Packet_GetArrayInitializationData = 31, + Packet_GetArrayRank = 32, + //Packet_GetMethodBlockCounts = 33, + Packet_GetBoundaries = 34, + Packet_GetBoxHelper = 35, + Packet_GetBuiltinClass = 36, + Packet_GetCallInfo = 37, + Packet_GetCastingHelper = 38, + Packet_GetChildType = 39, + Packet_GetClassAlignmentRequirement = 40, + Packet_GetClassAttribs = 41, + Packet_GetClassDomainID = 42, + Packet_GetClassGClayout = 43, + Packet_GetClassModuleIdForStatics = 44, + Packet_GetClassName = 45, + Packet_GetClassNumInstanceFields = 46, + Packet_GetClassSize = 47, + Packet_GetCookieForPInvokeCalliSig = 48, + Packet_GetDelegateCtor = 49, + Packet_GetEEInfo = 50, + Packet_GetEHinfo = 51, + Packet_GetFieldAddress = 52, + Packet_GetFieldClass = 53, + Packet_GetFieldInClass = 54, + Packet_GetFieldInfo = 55, + Packet_GetFieldName = 56, + Packet_GetFieldOffset = 57, + Packet_GetFieldThreadLocalStoreID = 58, + Packet_GetFieldType = 59, + Packet_GetFunctionEntryPoint = 60, + Packet_GetFunctionFixedEntryPoint = 61, + Packet_GetGSCookie = 62, + Packet_GetHelperFtn = 63, + Packet_GetHelperName = 64, + Packet_GetInlinedCallFrameVptr = 65, + Packet_GetIntrinsicID = 66, + Packet_GetJitTimeLogFilename = 67, + Packet_GetJustMyCodeHandle = 68, + Packet_GetLocationOfThisType = 69, + Packet_GetMethodAttribs = 70, + Packet_GetMethodClass = 71, + Packet_GetMethodDefFromMethod = 72, + Packet_GetMethodHash = 73, + Packet_GetMethodInfo = 74, + Packet_GetMethodName = 75, + Packet_GetMethodSig = 76, + Packet_GetMethodSync = 77, + Packet_GetMethodVTableOffset = 78, + Packet_GetNewArrHelper = 79, + Packet_GetNewHelper = 80, + Packet_GetParentType = 81, + //Packet_GetPInvokeUnmanagedTarget = 82, + Packet_GetProfilingHandle = 83, + Packet_GetRelocTypeHint = 84, + //Packet_GetSecurityPrologHelper = 85, + Packet_GetSharedCCtorHelper = 86, + //Packet_GetTailCallCopyArgsThunk = 87, + Packet_GetThreadTLSIndex = 88, + Packet_GetTokenTypeAsHandle = 89, + Packet_GetTypeForBox = 90, + Packet_GetTypeForPrimitiveValueClass = 91, + Packet_GetUnBoxHelper = 92, + Packet_GetUnmanagedCallConv = 94, + Packet_GetVarArgsHandle = 95, + Packet_GetVars = 96, + Packet_InitClass = 97, + //Packet_InitConstraintsForVerification = 98, + Packet_IsCompatibleDelegate = 99, + //Packet_IsInstantiationOfVerifiedGeneric = 100, + Packet_IsSDArray = 101, + Packet_IsStructRequiringStackAllocRetBuf = 102, + Packet_IsValidStringRef = 103, + //Retired6 = 104, + Packet_IsValueClass = 105, + //Packet_IsWriteBarrierHelperRequired = 106, + Packet_MergeClasses = 107, + Packet_PInvokeMarshalingRequired = 108, + Packet_ResolveToken = 109, + Packet_SatisfiesClassConstraints = 110, + Packet_SatisfiesMethodConstraints = 111, + Packet_DoesFieldBelongToClass = 112, + PacketCR_AddressMap = 113, + PacketCR_AllocGCInfo = 114, + PacketCR_AllocMem = 115, + PacketCR_CallLog = 116, + PacketCR_ClassMustBeLoadedBeforeCodeIsRun = 117, + PacketCR_CompileMethod = 118, + PacketCR_MessageLog = 119, PacketCR_MethodMustBeLoadedBeforeCodeIsRun = 120, - PacketCR_ProcessName = 121, - PacketCR_RecordRelocation = 122, - PacketCR_ReportFatalError = 123, - PacketCR_ReportInliningDecision = 124, - PacketCR_ReportTailCallDecision = 125, - PacketCR_ReserveUnwindInfo = 133, - PacketCR_SetBoundaries = 126, - PacketCR_SetEHcount = 127, - PacketCR_SetEHinfo = 128, - PacketCR_SetMethodAttribs = 129, - PacketCR_SetVars = 130, - PacketCR_SetPatchpointInfo = 176, // added 8/5/2019 - PacketCR_RecordCallSite = 146, // Retired 9/13/2020 - PacketCR_RecordCallSiteWithSignature = 179, // Added 9/13/2020 - PacketCR_RecordCallSiteWithoutSignature = 180, // Added 9/13/2020 - PacketCR_CrSigInstHandleMap = 185, + PacketCR_ProcessName = 121, + PacketCR_RecordRelocation = 122, + PacketCR_ReportFatalError = 123, + PacketCR_ReportInliningDecision = 124, + PacketCR_ReportTailCallDecision = 125, + PacketCR_SetBoundaries = 126, + PacketCR_SetEHcount = 127, + PacketCR_SetEHinfo = 128, + PacketCR_SetMethodAttribs = 129, + PacketCR_SetVars = 130, + //Packet_AllocMethodBlockCounts = 131, + PacketCR_AllocUnwindInfo = 132, + PacketCR_ReserveUnwindInfo = 133, + Packet_FilterException = 134, + //Packet_HandleException = 135, + //Retired9 = 136, + Packet_IsFieldStatic = 137, + PacketCR_AssertLog = 138, + Packet_GetArgClass = 139, + Packet_GetArgType = 140, + //Retired5 = 141, + Packet_CheckMethodModifier = 142, + Packet_CompileMethod = 143, + Packet_IsValidToken = 144, + Packet_FindNameOfToken = 145, + //PacketCR_RecordCallSite = 146, + Packet_GetLazyStringLiteralHelper = 147, + Packet_IsIntrinsicType = 148, + Packet_AppendClassName = 149, + Packet_GetReadyToRunHelper = 150, + Packet_GetIntConfigValue = 151, + Packet_GetStringConfigValue = 152, + Packet_GetAddressOfPInvokeTarget = 153, + Packet_GetJitFlags = 154, + Packet_IsDelegateCreationAllowed = 155, + Packet_GetSystemVAmd64PassStructInRegisterDescriptor = 156, + Packet_GetReadyToRunDelegateCtorHelper = 157, + Packet_TryResolveToken = 158, + Packet_GetHFAType = 159, + Packet_ResolveVirtualMethod = 160, + Packet_GetMethodNameFromMetadata = 161, + Packet_GetDefaultEqualityComparerClass = 162, + Packet_CompareTypesForCast = 163, + Packet_CompareTypesForEquality = 164, + Packet_GetUnboxedEntry = 165, + Packet_GetClassNameFromMetadata = 166, + Packet_GetTypeInstantiationArgument = 167, + Packet_GetTypeForPrimitiveNumericClass = 168, + Packet_ConvertPInvokeCalliToCall = 169, + Packet_GetHeapClassSize = 170, + Packet_CanAllocateOnStack = 171, + Packet_GetStaticFieldCurrentClass = 172, + Packet_CanInlineTypeCheck = 173, + Packet_IsMoreSpecificType = 174, + Packet_GetStringLiteral = 175, + PacketCR_SetPatchpointInfo = 176, + Packet_GetOSRInfo = 177, + Packet_GetTailCallHelpers = 178, + PacketCR_RecordCallSiteWithSignature = 179, + PacketCR_RecordCallSiteWithoutSignature = 180, + Packet_GetMethodModule = 181, + //Retired10 = 182, + Packet_GetExpectedTargetArchitecture = 183, + Packet_SigInstHandleMap = 184, + PacketCR_CrSigInstHandleMap = 185, + Packet_AllocPgoInstrumentationBySchema = 186, + Packet_GetPgoInstrumentationResults = 187, + Packet_GetDefaultComparerClass = 188, + Packet_GetClassModule = 189, + Packet_GetModuleAssembly = 190, + Packet_GetAssemblyName = 191, + Packet_IsJitIntrinsic = 192, + Packet_UpdateEntryPointForTailCall = 193, }; void SetDebugDumpVariables(); diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shared/spmirecordhelper.h b/src/coreclr/ToolBox/superpmi/superpmi-shared/spmirecordhelper.h index d5316b94db17bb..17476cb9ba69ab 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shared/spmirecordhelper.h +++ b/src/coreclr/ToolBox/superpmi/superpmi-shared/spmirecordhelper.h @@ -97,9 +97,9 @@ class SpmiRecordsHelper static CORINFO_LOOKUP_KIND RestoreCORINFO_LOOKUP_KIND(Agnostic_CORINFO_LOOKUP_KIND& lookupKind); static Agnostic_CORINFO_CONST_LOOKUP StoreAgnostic_CORINFO_CONST_LOOKUP( - CORINFO_CONST_LOOKUP* pLookup); + const CORINFO_CONST_LOOKUP* pLookup); - static CORINFO_CONST_LOOKUP RestoreCORINFO_CONST_LOOKUP(Agnostic_CORINFO_CONST_LOOKUP& lookup); + static CORINFO_CONST_LOOKUP RestoreCORINFO_CONST_LOOKUP(const Agnostic_CORINFO_CONST_LOOKUP& lookup); static Agnostic_CORINFO_RUNTIME_LOOKUP StoreAgnostic_CORINFO_RUNTIME_LOOKUP( CORINFO_RUNTIME_LOOKUP* pLookup); @@ -459,7 +459,7 @@ inline CORINFO_LOOKUP_KIND SpmiRecordsHelper::RestoreCORINFO_LOOKUP_KIND( } inline Agnostic_CORINFO_CONST_LOOKUP SpmiRecordsHelper::StoreAgnostic_CORINFO_CONST_LOOKUP( - CORINFO_CONST_LOOKUP* pLookup) + const CORINFO_CONST_LOOKUP* pLookup) { Agnostic_CORINFO_CONST_LOOKUP constLookup; ZeroMemory(&constLookup, sizeof(constLookup)); @@ -469,7 +469,7 @@ inline Agnostic_CORINFO_CONST_LOOKUP SpmiRecordsHelper::StoreAgnostic_CORINFO_CO } inline CORINFO_CONST_LOOKUP SpmiRecordsHelper::RestoreCORINFO_CONST_LOOKUP( - Agnostic_CORINFO_CONST_LOOKUP& lookup) + const Agnostic_CORINFO_CONST_LOOKUP& lookup) { CORINFO_CONST_LOOKUP constLookup; constLookup.accessType = (InfoAccessType)lookup.accessType; diff --git a/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp index 0bbdc12379c97c..0d0acb208b4164 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -1779,6 +1779,9 @@ bool interceptor_ICJI::getTailCallHelpers( void interceptor_ICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) { mc->cr->AddCall("updateEntryPointForTailCall"); + CORINFO_CONST_LOOKUP origEntryPoint = *entryPoint; + original_ICorJitInfo->updateEntryPointForTailCall(entryPoint); + mc->recUpdateEntryPointForTailCall(origEntryPoint, *entryPoint); } // Stuff directly on ICorJitInfo diff --git a/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp index eb3629c43c0a1c..29ed9e03df3638 100644 --- a/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/ToolBox/superpmi/superpmi/icorjitinfo.cpp @@ -1559,6 +1559,7 @@ bool MyICJI::notifyInstructionSetUsage(CORINFO_InstructionSet instructionSet, bo void MyICJI::updateEntryPointForTailCall(CORINFO_CONST_LOOKUP* entryPoint) { jitInstance->mc->cr->AddCall("updateEntryPointForTailCall"); + jitInstance->mc->repUpdateEntryPointForTailCall(entryPoint); } // Stuff directly on ICorJitInfo From ad78caf68295ab5ef090a1e4b7624f1620ab5462 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 5 Oct 2021 13:21:02 +0200 Subject: [PATCH 22/22] Take necessary conditions into account in canTailCall Do not do implicit tailcalls when * The caller is the entry point * The caller is marked NoInline * The callee requires security object --- .../JitInterface/CorInfoImpl.ReadyToRun.cs | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 63f58d66b8cea1..542565a5128c94 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -923,6 +923,35 @@ private void getFunctionEntryPoint(CORINFO_METHOD_STRUCT_* ftn, ref CORINFO_CONS private bool canTailCall(CORINFO_METHOD_STRUCT_* callerHnd, CORINFO_METHOD_STRUCT_* declaredCalleeHnd, CORINFO_METHOD_STRUCT_* exactCalleeHnd, bool fIsTailPrefix) { + if (!fIsTailPrefix) + { + MethodDesc caller = HandleToObject(callerHnd); + + // Do not tailcall out of the entry point as it results in a confusing debugger experience. + if (caller is EcmaMethod em && em.Module.EntryPoint == caller) + { + return false; + } + + // Do not tailcall from methods that are marked as noinline (people often use no-inline + // to mean "I want to always see this method in stacktrace") + if (caller.IsNoInlining) + { + return false; + } + + // Methods with StackCrawlMark depend on finding their caller on the stack. + // If we tail call one of these guys, they get confused. For lack of + // a better way of identifying them, we use DynamicSecurity attribute to identify + // them. + // + MethodDesc callee = exactCalleeHnd == null ? null : HandleToObject(exactCalleeHnd); + if (callee != null && callee.RequireSecObject) + { + return false; + } + } + return true; }