diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index d1988ba34a5394..b512e92a6e3f28 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -909,97 +909,6 @@ void GcInfoEncoder::FinalizeSlotIds() #endif } -#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - -// tells whether a slot cannot contain an object reference -// at call instruction or right after returning -bool GcInfoEncoder::DoNotTrackInPartiallyInterruptible(GcSlotDesc &slotDesc) -{ -#if defined(TARGET_ARM) - - _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - if(slotDesc.IsRegister()) - { - int regNum = (int) slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 14); - _ASSERTE(regNum != 13); // sp - - return ((regNum <= 3) || (regNum >= 12)) // R12 is volatile and SP/LR can't contain objects around calls - && regNum != 0 // R0 can contain return value - ; - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#elif defined(TARGET_ARM64) - - _ASSERTE(m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1); - if (slotDesc.IsRegister()) - { - int regNum = (int)slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 30); - _ASSERTE(regNum != 18); - - return (regNum <= 17 || regNum >= 29) // X0 through X17 are scratch, FP/LR can't be used for objects around calls - && regNum != 0 // X0 can contain return value - && regNum != 1 // X1 can contain return value - ; - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#elif defined(TARGET_AMD64) - - _ASSERTE( m_SizeOfStackOutgoingAndScratchArea != (UINT32)-1 ); - if(slotDesc.IsRegister()) - { - int regNum = (int) slotDesc.Slot.RegisterNumber; - _ASSERTE(regNum >= 0 && regNum <= 16); - _ASSERTE(regNum != 4); // rsp - - UINT16 PreservedRegMask = - (1 << 3) // rbx - | (1 << 5) // rbp -#ifndef UNIX_AMD64_ABI - | (1 << 6) // rsi - | (1 << 7) // rdi -#endif // UNIX_AMD64_ABI - | (1 << 12) // r12 - | (1 << 13) // r13 - | (1 << 14) // r14 - | (1 << 15) // r15 - | (1 << 0) // rax - may contain return value -#ifdef UNIX_AMD64_ABI - | (1 << 2) // rdx - may contain return value -#endif - ; - - return !(PreservedRegMask & (1 << regNum)); - } - else if (!slotDesc.IsUntracked() && (slotDesc.Slot.Stack.Base == GC_SP_REL) && - ((UINT32)slotDesc.Slot.Stack.SpOffset < m_SizeOfStackOutgoingAndScratchArea)) - { - return TRUE; - } - else - return FALSE; - -#else - return FALSE; -#endif -} -#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - void GcInfoEncoder::Build() { #ifdef _DEBUG @@ -1389,14 +1298,11 @@ void GcInfoEncoder::Build() else { UINT32 slotIndex = pCurrent->SlotId; - if(!DoNotTrackInPartiallyInterruptible(m_SlotTable[slotIndex])) - { - BYTE becomesLive = pCurrent->BecomesLive; - _ASSERTE((liveState.ReadBit(slotIndex) && !becomesLive) - || (!liveState.ReadBit(slotIndex) && becomesLive)); + BYTE becomesLive = pCurrent->BecomesLive; + _ASSERTE((liveState.ReadBit(slotIndex) && !becomesLive) + || (!liveState.ReadBit(slotIndex) && becomesLive)); - liveState.WriteBit(slotIndex, becomesLive); - } + liveState.WriteBit(slotIndex, becomesLive); pCurrent++; } } diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 08970ec4102e9b..88a03a46315e41 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -574,6 +574,7 @@ enum CorInfoHelpFunc CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, // Transition to preemptive mode and track transitions in reverse P/Invoke prolog. CORINFO_HELP_GVMLOOKUP_FOR_SLOT, // Resolve a generic virtual method target from this pointer and runtime method handle + CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, // Resolve a non-generic interface method from this pointer and dispatch cell CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h index 8c5daf92c23b3f..3777e1b7064bb2 100644 --- a/src/coreclr/inc/gcinfoencoder.h +++ b/src/coreclr/inc/gcinfoencoder.h @@ -542,10 +542,6 @@ class GcInfoEncoder void SizeofSlotStateVarLengthVector(const BitArray& vector, UINT32 baseSkip, UINT32 baseRun, UINT32 * pSizeofSimple, UINT32 * pSizeofRLE, UINT32 * pSizeofRLENeg); UINT32 WriteSlotStateVarLengthVector(BitStreamWriter &writer, const BitArray& vector, UINT32 baseSkip, UINT32 baseRun); -#ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - bool DoNotTrackInPartiallyInterruptible(GcSlotDesc &slot); -#endif // PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED - // Assumes that "*ppTransitions" is has size "numTransitions", is sorted by CodeOffset then by SlotId, // and that "*ppEndTransitions" points one beyond the end of the array. If "*ppTransitions" contains // any dead/live transitions pairs for the same CodeOffset and SlotID, removes those, by allocating a diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index ca025f62a7ec4d..5feda781faee94 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 4463d6ac-dfcb-4ab0-a941-c53b56089b7c */ - 0x4463d6ac, - 0xdfcb, - 0x4ab0, - {0xa9, 0x41, 0xc5, 0x3b, 0x56, 0x08, 0x9b, 0x7c} +constexpr GUID JITEEVersionIdentifier = { /* 254e838d-821b-4600-9c4e-b5ea6ef20d38 */ + 0x254e838d, + 0x821b, + 0x4600, + {0x9c, 0x4e, 0xb5, 0xea, 0x6e, 0xf2, 0x0d, 0x38} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 38114a9bbfcada..c6268c0f6821ed 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -309,6 +309,11 @@ JITHELPER(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, JIT_ReversePInvokeExitTrackTransitions, METHOD__NIL) JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, METHOD__NIL) +#if defined(TARGET_AMD64) + JITHELPER(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, JIT_InterfaceLookupForSlot, METHOD__NIL) +#else + JITHELPER(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, NULL, METHOD__NIL) +#endif #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, METHOD__NIL) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 7fb0d7982f93df..291e9639ee9547 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -735,6 +735,11 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) case CORINFO_HELP_VALIDATE_INDIRECT_CALL: return RBM_VALIDATE_INDIRECT_CALL_TRASH; +#ifdef RBM_INTERFACELOOKUP_FOR_SLOT_TRASH + case CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: + return RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; +#endif + default: return RBM_CALLEE_TRASH; } @@ -6069,6 +6074,7 @@ void CodeGen::genDefinePendingCallLabel(GenTreeCall* call) { case CORINFO_HELP_VALIDATE_INDIRECT_CALL: case CORINFO_HELP_VIRTUAL_FUNC_PTR: + case CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: case CORINFO_HELP_MEMSET: case CORINFO_HELP_MEMCPY: return; diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index bbf30da0c728fa..ede18f7f84c810 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -9983,7 +9983,6 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn unsigned argStkCnt; S_UINT16 argRecCnt(0); // arg count for ESP, ptr-arg count for EBP - unsigned gcrefRegs, byrefRegs; #ifdef JIT32_GCENCODER // For the general encoder, we always need to record calls, so we make this call @@ -10025,26 +10024,19 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn return; #endif - // Do we have any interesting (i.e., callee-saved) registers live here? + // Do we have any interesting registers live here? - gcrefRegs = byrefRegs = 0; + unsigned gcrefRegs = emitThisGCrefRegs.GetIntRegSet() >> REG_INT_FIRST; + unsigned byrefRegs = emitThisByrefRegs.GetIntRegSet() >> REG_INT_FIRST; - // We make a bitmask whose bits correspond to callee-saved register indices (in the sequence - // of callee-saved registers only). - for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) - { - regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; - if (emitThisGCrefRegs & calleeSavedRbm) - { - gcrefRegs |= (1 << calleeSavedRegIdx); - } - if (emitThisByrefRegs & calleeSavedRbm) - { - byrefRegs |= (1 << calleeSavedRegIdx); - } - } + assert(regMaskTP::FromIntRegSet(SingleTypeRegSet(gcrefRegs << REG_INT_FIRST)) == emitThisGCrefRegs); + assert(regMaskTP::FromIntRegSet(SingleTypeRegSet(byrefRegs << REG_INT_FIRST)) == emitThisByrefRegs); #ifdef JIT32_GCENCODER + // x86 does not report GC refs/byrefs in return registers at call sites + gcrefRegs &= ~(1u << (REG_INTRET - REG_INT_FIRST)); + byrefRegs &= ~(1u << (REG_INTRET - REG_INT_FIRST)); + // For the general encoder, we always have to record calls, so we don't take this early return. /* Are there any // args to pop at this call site? @@ -10353,13 +10345,13 @@ const char* emitter::emitOffsetToLabel(unsigned offs) regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) { // Is it a helper with a special saved set? - bool isNoGCHelper = emitNoGChelper(methHnd); + bool isNoGCHelper = emitNoGChelper(methHnd); + CorInfoHelpFunc helper = Compiler::eeGetHelperNum(methHnd); + if (isNoGCHelper) { - CorInfoHelpFunc helpFunc = Compiler::eeGetHelperNum(methHnd); - // Get the set of registers that this call kills and remove it from the saved set. - regMaskTP savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helpFunc); + regMaskTP savedSet = RBM_ALLINT & ~emitGetGCRegsKilledByNoGCCall(helper); #ifdef DEBUG if (emitComp->verbose) @@ -10372,6 +10364,13 @@ regMaskTP emitter::emitGetGCRegsSavedOrModified(CORINFO_METHOD_HANDLE methHnd) #endif return savedSet; } +#ifdef RBM_INTERFACELOOKUP_FOR_SLOT_TRASH + else if (helper == CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT) + { + // This one is not no-gc, but it preserves arg registers. + return RBM_ALLINT & ~RBM_INTERFACELOOKUP_FOR_SLOT_TRASH; + } +#endif else { // This is the saved set of registers after a normal call. diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index b32dc60292dbbf..688f61f60923ed 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3199,9 +3199,24 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un callArgCnt = genRegPtrTemp->rpdPtrArg; - unsigned gcrefRegMask = genRegPtrTemp->rpdCallGCrefRegs; + unsigned gcrefRegMask = 0; - byrefRegMask = genRegPtrTemp->rpdCallByrefRegs; + byrefRegMask = 0; + + // The order here is fixed: it must agree with the order assumed in eetwain. + // NB: x86 GC decoder does not report return registers at call sites. + static const regNumber calleeSaveOrder[] = {REG_EDI, REG_ESI, REG_EBX, REG_EBP}; + for (unsigned i = 0; i < ArrLen(calleeSaveOrder); i++) + { + if ((genRegPtrTemp->rpdCallGCrefRegs & (1 << (calleeSaveOrder[i] - REG_INT_FIRST))) != 0) + { + gcrefRegMask |= 1u << i; + } + if ((genRegPtrTemp->rpdCallByrefRegs & (1 << (calleeSaveOrder[i] - REG_INT_FIRST))) != 0) + { + byrefRegMask |= 1u << i; + } + } assert((gcrefRegMask & byrefRegMask) == 0); @@ -4465,8 +4480,8 @@ void GCInfo::gcMakeRegPtrTable( assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0); // Other than that, we just have to deal with the regmasks. - regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); - regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); + regMaskSmall gcrefRegMask = call->cdGCrefRegs; + regMaskSmall byrefRegMask = call->cdByrefRegs; assert((gcrefRegMask & byrefRegMask) == 0); @@ -4552,11 +4567,8 @@ void GCInfo::gcMakeRegPtrTable( { // This is a true call site. - regMaskSmall gcrefRegMask = - genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); - - regMaskSmall byrefRegMask = - genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); + regMaskSmall gcrefRegMask = regMaskSmall(genRegPtrTemp->rpdCallGCrefRegs << REG_INT_FIRST); + regMaskSmall byrefRegMask = regMaskSmall(genRegPtrTemp->rpdCallByrefRegs << REG_INT_FIRST); assert((gcrefRegMask & byrefRegMask) == 0); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index fbcc431a081e30..aa93ba58d87c12 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1870,6 +1870,47 @@ void CallArgs::Remove(CallArg* arg) assert(!"Did not find arg to remove in CallArgs::Remove"); } +//--------------------------------------------------------------- +// RemoveLate: Remove an argument from the argument list and late argument list. +// +// Parameters: +// arg - The arg to remove. +// +// Remarks: +// This can be used to remove arguments after ABI determination and after morph. +// It removes the argument from both the early and late list. However, no ABI +// information is updated. Caller needs to know what they are doing. +// +void CallArgs::RemoveLate(CallArg* arg) +{ + CallArg** slot = &m_lateHead; + while (*slot != nullptr) + { + if (*slot == arg) + { + *slot = arg->GetLateNext(); + break; + } + + slot = &(*slot)->LateNextRef(); + } + + slot = &m_head; + while (*slot != nullptr) + { + if (*slot == arg) + { + *slot = arg->GetNext(); + RemovedWellKnownArg(arg->GetWellKnownArg()); + return; + } + + slot = &(*slot)->NextRef(); + } + + assert(!"Did not find arg to remove in CallArgs::RemoveLate"); +} + #ifdef TARGET_XARCH //--------------------------------------------------------------- // NeedsVzeroupper: Determines if the call needs a vzeroupper emitted before it is invoked diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index e01d9705eb0d4a..72719ee2b40502 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4777,6 +4777,7 @@ class CallArgs CallArg* InsertAfterThisOrFirst(Compiler* comp, const NewCallArg& arg); void PushLateBack(CallArg* arg); void Remove(CallArg* arg); + void RemoveLate(CallArg* arg); template void InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc copyFunc); diff --git a/src/coreclr/jit/jitgcinfo.h b/src/coreclr/jit/jitgcinfo.h index 6c62a0816113a5..6d04e3b22734b3 100644 --- a/src/coreclr/jit/jitgcinfo.h +++ b/src/coreclr/jit/jitgcinfo.h @@ -162,7 +162,13 @@ class GCInfo regMaskSmall rpdDel; // regptr bitset being removed } rpdCompiler; - unsigned short rpdPtrArg; // arg offset or popped arg count + struct + { + // Registers after call containing GC/byref (index 0 = REG_INT_FIRST) + unsigned int rpdCallGCrefRegs; + unsigned int rpdCallByrefRegs; + unsigned short rpdPtrArg; // arg offset or popped arg count + }; }; #ifndef JIT32_GCENCODER @@ -182,11 +188,8 @@ class GCInfo return (GCtype)rpdGCtype; } - unsigned short rpdIsThis : 1; // is it the 'this' pointer - unsigned short rpdCall : 1; // is this a true call site? - unsigned short : 1; // Padding bit, so next two start on a byte boundary - unsigned short rpdCallGCrefRegs : CNT_CALL_GC_REGS; // Callee-saved and return registers containing GC pointers. - unsigned short rpdCallByrefRegs : CNT_CALL_GC_REGS; // Callee-saved and return registers containing byrefs. + unsigned short rpdIsThis : 1; // is it the 'this' pointer + unsigned short rpdCall : 1; // is this a true call site? #ifndef JIT32_GCENCODER bool rpdIsCallInstr() diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 71500b34a952a1..0203aee46c56ad 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3363,8 +3363,99 @@ void Lowering::LowerCFGCall(GenTreeCall* call) { return; } + auto cloneUse = [=](LIR::Use& use, bool cloneConsts) -> GenTree* { + bool canClone = cloneConsts && use.Def()->IsCnsIntOrI(); + if (!canClone && use.Def()->OperIs(GT_LCL_VAR)) + { + canClone = !comp->lvaGetDesc(use.Def()->AsLclVarCommon())->IsAddressExposed(); + } + + if (canClone) + { + return comp->gtCloneExpr(use.Def()); + } + else + { + unsigned newLcl = use.ReplaceWithLclVar(comp); + return comp->gtNewLclvNode(newLcl, TYP_I_IMPL); + } + }; GenTree* callTarget = call->gtCallType == CT_INDIRECT ? call->gtCallAddr : call->gtControlExpr; + + if (call->IsVirtualStub()) + { + // VSDs go through a resolver instead which skips double validation and + // indirection. + CallArg* vsdCellArg = call->gtArgs.FindWellKnownArg(WellKnownArg::VirtualStubCell); + CallArg* thisArg = call->gtArgs.GetThisArg(); + + assert((vsdCellArg != nullptr) && (thisArg != nullptr)); + assert(thisArg->GetNode()->OperIs(GT_PUTARG_REG)); + LIR::Use thisArgUse(BlockRange(), &thisArg->GetNode()->AsOp()->gtOp1, thisArg->GetNode()); + GenTree* thisArgClone = cloneUse(thisArgUse, true); + + // The VSD cell is not needed for the original call when going through the resolver. + // It can be removed without further fixups because it has fixed ABI assignment. + call->gtArgs.RemoveLate(vsdCellArg); + assert(vsdCellArg->GetNode()->OperIs(GT_PUTARG_REG)); + // Also PUTARG_REG can be removed. + BlockRange().Remove(vsdCellArg->GetNode()); + // The actual cell we need for the resolver. + GenTree* vsdCellArgNode = vsdCellArg->GetNode()->gtGetOp1(); + + GenTreeCall* resolve = comp->gtNewHelperCallNode(CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, TYP_I_IMPL); + + // Use a placeholder for the cell since the cell is already inserted in + // LIR. + GenTree* vsdCellPlaceholder = comp->gtNewZeroConNode(TYP_I_IMPL); + resolve->gtArgs.PushFront(comp, + NewCallArg::Primitive(vsdCellPlaceholder).WellKnown(WellKnownArg::VirtualStubCell)); + + // 'this' arg clone is not inserted, so no need to use a placeholder for that. + resolve->gtArgs.PushFront(comp, NewCallArg::Primitive(thisArgClone)); + + comp->fgMorphTree(resolve); + + LIR::Range resolveRange = LIR::SeqTree(comp, resolve); + GenTree* resolveFirst = resolveRange.FirstNode(); + GenTree* resolveLast = resolveRange.LastNode(); + // Resolution comes with a null check, so it must happen after all + // arguments are evaluated, hence we insert it right before the call. + BlockRange().InsertBefore(call, std::move(resolveRange)); + + // Swap out the VSD cell argument. + LIR::Use vsdCellUse; + bool gotUse = BlockRange().TryGetUse(vsdCellPlaceholder, &vsdCellUse); + assert(gotUse); + vsdCellUse.ReplaceWith(vsdCellArgNode); + vsdCellPlaceholder->SetUnusedValue(); + + // Now we can lower the resolver. + LowerRange(resolveFirst, resolveLast); + + // That inserted new PUTARG nodes right before the call, so we need to + // legalize the existing call's PUTARG_REG nodes. + MoveCFGCallArgs(call); + + // Finally update the call target + call->gtCallType = CT_INDIRECT; + call->gtFlags &= ~GTF_CALL_VIRT_STUB; + call->gtCallAddr = resolve; + call->gtCallCookie = nullptr; +#ifdef FEATURE_READYTORUN + call->gtEntryPoint.addr = nullptr; + call->gtEntryPoint.accessType = IAT_VALUE; +#endif + + if (callTarget != nullptr) + { + callTarget->SetUnusedValue(); + } + + callTarget = resolve; + } + if (callTarget == nullptr) { assert((call->gtCallType != CT_INDIRECT) && (!call->IsVirtual() || call->IsVirtualStubRelativeIndir())); @@ -3391,7 +3482,7 @@ void Lowering::LowerCFGCall(GenTreeCall* call) cloneConsts = true; #endif - GenTree* indirCellClone; + GenTree* indirCellClone = cloneUse(indirCellArgUse, cloneConsts); if (indirCellArgUse.Def()->OperIs(GT_LCL_VAR) || (cloneConsts && indirCellArgUse.Def()->IsCnsIntOrI())) { @@ -6527,7 +6618,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // fgMorphArgs will have created trees to pass the address in VirtualStubParam.reg. // All we have to do here is add an indirection to generate the actual call target. - GenTree* ind = Ind(call->gtCallAddr); + GenTree* ind = comp->gtNewIndir(TYP_I_IMPL, call->gtCallAddr, GTF_IND_NONFAULTING); BlockRange().InsertAfter(call->gtCallAddr, ind); call->gtCallAddr = ind; @@ -6569,7 +6660,7 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) if (!shouldOptimizeVirtualStubCall) { - result = Ind(addr); + result = comp->gtNewIndir(TYP_I_IMPL, addr, GTF_IND_NONFAULTING); } } } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d0bdd858ef7f9d..41dc038208a0f8 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -1076,12 +1076,12 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) } } - // When CFG is enabled and this is a delegate call or vtable call we must + // When CFG is enabled and this is a delegate call or virtual call we must // compute the call target before all late args. However this will // effectively null-check 'this', which should happen only after all // arguments are evaluated. Thus we must evaluate all args with side // effects to a temp. - if (comp->opts.IsCFGEnabled() && (call->IsVirtualVtable() || call->IsDelegateInvoke())) + if (comp->opts.IsCFGEnabled() && (call->IsVirtual() || call->IsDelegateInvoke())) { // Always evaluate 'this' to temp. assert(HasThisPointer()); diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 06444fb31cf956..3d9354b040f48e 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -944,27 +944,6 @@ regNumber genRegArgNext(regNumber argReg) } } -/***************************************************************************** - * - * The following table determines the order in which callee registers - * are encoded in GC information at call sites. - */ - -const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; - -regMaskTP genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) -{ - regMaskTP res = 0; - for (int i = 0; i < CNT_CALL_GC_REGS; i++) - { - if ((calleeSaveMask & (1 << i)) != 0) - { - res |= raRbmCalleeSaveOrder[i]; - } - } - return res; -} - /***************************************************************************** * * Initializes the spill code. Should be called once per function compiled. diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index a9728857eb717f..cb4957a6b5f107 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -379,6 +379,11 @@ struct regMaskTP #endif } + static regMaskTP FromIntRegSet(SingleTypeRegSet intRegs) + { + return regMaskTP(intRegs); + } + void operator|=(const regMaskTP& second) { low |= second.getLow(); @@ -1066,16 +1071,6 @@ inline SingleTypeRegSet getSingleTypeRegMask(regNumber reg, var_types regType) return regMask; } -/***************************************************************************** - * - * These arrays list the callee-saved register numbers (and bitmaps, respectively) for - * the current architecture. - */ -extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; - -// This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. -regMaskTP genRegMaskFromCalleeSavedMask(unsigned short); - /***************************************************************************** * * Assumes that "reg" is of the given "type". Return the next unused reg number after "reg" diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 869bf1944ce5f0..d26ba3057ce3db 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -300,7 +300,6 @@ #ifdef UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (5 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED + 2) #define CNT_CALLEE_TRASH_INT_INIT (9) #define CNT_CALLEE_TRASH_HIGHINT (8) @@ -308,16 +307,12 @@ #define CNT_CALLEE_SAVED_FLOAT (0) #define CNT_CALLEE_TRASH_FLOAT_INIT (16) #define CNT_CALLEE_TRASH_HIGHFLOAT (16) - /* NOTE: Sync with variable name defined in compiler.h */ - #define RBM_CALL_GC_REGS_ORDER RBM_EBX,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15,RBM_INTRET,RBM_INTRET_1 - #define RBM_CALL_GC_REGS (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_INTRET|RBM_INTRET_1) // For SysV we have more volatile registers so we do not save any callee saves for EnC. #define RBM_ENC_CALLEE_SAVED 0 #else // !UNIX_AMD64_ABI #define CNT_CALLEE_SAVED (7 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED + 1) #define CNT_CALLEE_TRASH_INT_INIT (7) #define CNT_CALLEE_TRASH_HIGHINT (8) @@ -326,9 +321,6 @@ #define CNT_CALLEE_SAVED_FLOAT (10) #define CNT_CALLEE_TRASH_FLOAT_INIT (6) #define CNT_CALLEE_TRASH_HIGHFLOAT (16) - /* NOTE: Sync with variable name defined in compiler.h */ - #define RBM_CALL_GC_REGS_ORDER RBM_EBX,RBM_ESI,RBM_EDI,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15,RBM_INTRET - #define RBM_CALL_GC_REGS (RBM_EBX|RBM_ESI|RBM_EDI|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_INTRET) // Callee-preserved registers we always save and allow use of for EnC code, since there are quite few volatile registers. #define RBM_ENC_CALLEE_SAVED (RBM_RSI | RBM_RDI) @@ -543,6 +535,8 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (RBM_RAX | RBM_R10 | RBM_R11) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R10 | RBM_RCX)) #define RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL (RBM_INT_CALLEE_TRASH_ALL & ~(RBM_R10 | RBM_RCX)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_RCX diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 710187e70b0667..95cb19a2291a49 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -89,13 +89,9 @@ #define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7) #define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC) - #define RBM_CALL_GC_REGS_ORDER RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11,RBM_INTRET - #define RBM_CALL_GC_REGS (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_INTRET) - #define CNT_CALLEE_SAVED (8) #define CNT_CALLEE_TRASH (6) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED+1) #define CNT_CALLEE_SAVED_FLOAT (16) #define CNT_CALLEE_TRASH_FLOAT (16) diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index f5b96a2bc6104a..a311b2276cf882 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -108,13 +108,9 @@ REG_V12, REG_V13, REG_V14, REG_V15, \ REG_V3, REG_V2, REG_V1, REG_V0 - #define RBM_CALL_GC_REGS_ORDER RBM_R19,RBM_R20,RBM_R21,RBM_R22,RBM_R23,RBM_R24,RBM_R25,RBM_R26,RBM_R27,RBM_R28,RBM_INTRET,RBM_INTRET_1 - #define RBM_CALL_GC_REGS (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28|RBM_INTRET|RBM_INTRET_1) - #define CNT_CALLEE_SAVED (11) #define CNT_CALLEE_TRASH (17) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED+2) #define CNT_CALLEE_SAVED_FLOAT (8) #define CNT_CALLEE_TRASH_FLOAT (24) @@ -263,6 +259,8 @@ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH + #define RBM_INTERFACELOOKUP_FOR_SLOT_TRASH (REG_R0 | REG_R12 | REG_R13 | REG_R14 | REG_R15) + #define RBM_VALIDATE_INDIRECT_CALL_TRASH (RBM_INT_CALLEE_TRASH & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R15)) #define REG_VALIDATE_INDIRECT_CALL_ADDR REG_R15 #define REG_DISPATCH_INDIRECT_CALL_ADDR REG_R9 diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 2e46478690e5cc..dd63766d631ac7 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -143,16 +143,9 @@ #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX #define MAX_VAR_ORDER_SIZE 6 - // The order here is fixed: it must agree with an order assumed in eetwain... - // NB: x86 GC decoder does not report return registers at call sites. - #define RBM_CALL_GC_REGS_ORDER RBM_EDI,RBM_ESI,RBM_EBX,RBM_EBP - #define RBM_CALL_GC_REGS (RBM_EDI|RBM_ESI|RBM_EBX|RBM_EBP) - #define CNT_CALLEE_SAVED (4) #define CNT_CALLEE_TRASH (3) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1) - // NB: x86 GC decoder does not report return registers at call sites. - #define CNT_CALL_GC_REGS (CNT_CALLEE_SAVED) #define CNT_CALLEE_SAVED_FLOAT (0) #define CNT_CALLEE_TRASH_FLOAT (6) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 0284daba94d541..8afde6ca76fea3 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -71,8 +71,10 @@ ASM_OFFSET( 4, 8, InterfaceDispatchCell, m_pCache) #ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER ASM_OFFSET( 8, 0, InterfaceDispatchCache, m_pCell) #endif +ASM_OFFSET( C, 18, InterfaceDispatchCache, m_cEntries) ASM_OFFSET( 10, 20, InterfaceDispatchCache, m_rgEntries) ASM_SIZEOF( 8, 10, InterfaceDispatchCacheEntry) +ASM_CONST( 3, 3, IDC_CACHE_POINTER_MASK) #endif // Undefine macros that are only used in this header for convenience. diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index ccf197b08f6602..04fe0c7f3e6996 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -213,6 +213,14 @@ list(APPEND RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/WriteBarriers.${ASM_SUFFIX} ) +if(CLR_CMAKE_TARGET_WIN32) + if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + list(APPEND RUNTIME_SOURCES_ARCH_ASM + ${ARCH_SOURCES_DIR}/DispatchResolve.${ASM_SUFFIX} + ) + endif() +endif() + # Add architecture specific folder for looking up headers. convert_to_absolute_path(ARCH_SOURCES_DIR ${ARCH_SOURCES_DIR}) include_directories(${ARCH_SOURCES_DIR}) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index a3643e32f5eaf7..bd7435775479b1 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -258,6 +258,9 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) return false; } +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) +EXTERN_C CODE_LOCATION RhpResolveInterfaceMethodFast; +#endif EXTERN_C CODE_LOCATION RhpInitialInterfaceDispatch; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation1; EXTERN_C CODE_LOCATION RhpInterfaceDispatchAVLocation2; @@ -272,6 +275,9 @@ static bool InInterfaceDispatchHelper(uintptr_t faultingIP) #ifndef USE_PORTABLE_HELPERS static uintptr_t interfaceDispatchAVLocations[] = { +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) + (uintptr_t)&RhpResolveInterfaceMethodFast, +#endif (uintptr_t)&RhpInitialInterfaceDispatch, (uintptr_t)&RhpInterfaceDispatchAVLocation1, (uintptr_t)&RhpInterfaceDispatchAVLocation2, diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 78a39612f907c0..9efd12ebe01853 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -41,6 +41,10 @@ #if defined(FEATURE_DYNAMIC_CODE) EXTERN_C CODE_LOCATION ReturnFromUniversalTransition; EXTERN_C CODE_LOCATION ReturnFromUniversalTransition_DebugStepTailCall; +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) +EXTERN_C CODE_LOCATION ReturnFromUniversalTransitionReturnResult; +EXTERN_C CODE_LOCATION ReturnFromUniversalTransitionReturnResult_DebugStepTailCall; +#endif #endif EXTERN_C CODE_LOCATION RhpCallCatchFunclet2; @@ -2244,7 +2248,13 @@ StackFrameIterator::ReturnAddressCategory StackFrameIterator::CategorizeUnadjust #if defined(FEATURE_DYNAMIC_CODE) if (EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition) || - EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition_DebugStepTailCall)) + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransition_DebugStepTailCall) +#if (defined(HOST_AMD64) || defined(HOST_ARM64)) && defined(HOST_WINDOWS) + || + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransitionReturnResult) || + EQUALS_RETURN_ADDRESS(returnAddress, ReturnFromUniversalTransitionReturnResult_DebugStepTailCall) +#endif + ) { return InUniversalTransitionThunk; } diff --git a/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm new file mode 100644 index 00000000000000..1dde32405ddc83 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/amd64/DispatchResolve.asm @@ -0,0 +1,63 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +include AsmMacros.inc + + +ifdef FEATURE_CACHED_INTERFACE_DISPATCH + +EXTERN RhpCidResolve : PROC +EXTERN RhpUniversalTransitionReturnResult_DebugStepTailCall : PROC + +;; Fast version of RhpResolveInterfaceMethod +LEAF_ENTRY RhpResolveInterfaceMethodFast, _TEXT + + ;; Load the MethodTable from the object instance in rcx. + ;; Trigger an AV if we're dispatching on a null this. + ;; The exception handling infrastructure is aware of the fact that this is the first + ;; instruction of RhpResolveInterfaceMethodFast and uses it to translate an AV here + ;; to a NullReferenceException at the callsite. + mov rax, [rcx] + + ;; r10 currently contains the indirection cell address. + ;; load r11 to point to the cache block. + mov r11, [r10 + OFFSETOF__InterfaceDispatchCell__m_pCache] + test r11b, IDC_CACHE_POINTER_MASK + jne RhpResolveInterfaceMethodFast_SlowPath + + lea r11, [r11 + OFFSETOF__InterfaceDispatchCache__m_rgEntries] + cmp qword ptr [r11], rax + jne RhpResolveInterfaceMethodFast_Polymorphic + mov rax, qword ptr [r11 + 8] + ret + + RhpResolveInterfaceMethodFast_Polymorphic: + ;; load the count of cache entries into edx + ;; r11 points to the first cache entry so to get to m_cEntries, we need to subtract m_rgEntries first + push rdx + mov edx, dword ptr [r11 - OFFSETOF__InterfaceDispatchCache__m_rgEntries + OFFSETOF__InterfaceDispatchCache__m_cEntries] + + RhpResolveInterfaceMethodFast_NextEntry: + add r11, SIZEOF__InterfaceDispatchCacheEntry + dec edx + jz RhpResolveInterfaceMethodFast_SlowPath_Pop + + cmp qword ptr [r11], rax + jne RhpResolveInterfaceMethodFast_NextEntry + + mov rax, qword ptr [r11 + 8] + pop rdx + ret + + RhpResolveInterfaceMethodFast_SlowPath_Pop: + pop rdx + RhpResolveInterfaceMethodFast_SlowPath: + mov r11, r10 + lea r10, RhpCidResolve + jmp RhpUniversalTransitionReturnResult_DebugStepTailCall + +LEAF_END RhpResolveInterfaceMethodFast, _TEXT + +endif ;; FEATURE_CACHED_INTERFACE_DISPATCH + +end diff --git a/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm b/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm index 777ef9d2de620c..91eaf95a48eda0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/UniversalTransition.asm @@ -84,7 +84,7 @@ DISTANCE_FROM_CHILDSP_TO_CALLERSP equ DISTANCE_FROM_CHILDSP_TO_RET ; everything between the base of the ReturnBlock and the top of the StackPassedArgs. ; -UNIVERSAL_TRANSITION macro FunctionName +UNIVERSAL_TRANSITION macro FunctionName, ExitSequence NESTED_ENTRY Rhp&FunctionName, _TEXT @@ -146,7 +146,7 @@ ALTERNATE_ENTRY ReturnFrom&FunctionName ; Pop the space that was allocated between the ChildSP and the caller return address. add rsp, DISTANCE_FROM_CHILDSP_TO_RETADDR - TAILJMP_RAX + ExitSequence NESTED_END Rhp&FunctionName, _TEXT @@ -155,8 +155,10 @@ NESTED_END Rhp&FunctionName, _TEXT ; To enable proper step-in behavior in the debugger, we need to have two instances ; of the thunk. For the first one, the debugger steps into the call in the function, ; for the other, it steps over it. - UNIVERSAL_TRANSITION UniversalTransition - UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + UNIVERSAL_TRANSITION UniversalTransition, TAILJMP_RAX + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall, TAILJMP_RAX + UNIVERSAL_TRANSITION UniversalTransitionReturnResult, ret + UNIVERSAL_TRANSITION UniversalTransitionReturnResult_DebugStepTailCall, ret endif diff --git a/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm b/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm new file mode 100644 index 00000000000000..92f2f8849e7b11 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/arm64/DispatchResolve.asm @@ -0,0 +1,60 @@ +;; Licensed to the .NET Foundation under one or more agreements. +;; The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmMacros.h" + + TEXTAREA + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + EXTERN RhpCidResolve + EXTERN RhpUniversalTransitionReturnResult_DebugStepTailCall + + NESTED_ENTRY RhpResolveInterfaceMethodFast, _TEXT + + ;; Load the MethodTable from the object instance in x0. + ;; Trigger an AV if we're dispatching on a null this. + ;; The exception handling infrastructure is aware of the fact that this is the first + ;; instruction of RhpResolveInterfaceMethodFast and uses it to translate an AV here + ;; to a NullReferenceException at the callsite. + ldr x12, [x0] + + ;; x11 currently contains the indirection cell address. + ;; load x13 to point to the cache block. + ldr x13, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + and x14, x13, #IDC_CACHE_POINTER_MASK + cbnz x14, RhpResolveInterfaceMethodFast_SlowPath + + add x14, x13, #OFFSETOF__InterfaceDispatchCache__m_rgEntries + ldr x15, [x14] + cmp x15, x12 + bne RhpResolveInterfaceMethodFast_Polymorphic + ldur x0, [x14, #8] + ret + +RhpResolveInterfaceMethodFast_Polymorphic + ldr w13, [x13, #OFFSETOF__InterfaceDispatchCache__m_cEntries] + +RhpResolveInterfaceMethodFast_NextEntry + add x14, x14, #SIZEOF__InterfaceDispatchCacheEntry + sub w13, w13, #1 + cmp w13, #0 + beq RhpResolveInterfaceMethodFast_SlowPath + + ldr x15, [x14] + cmp x15, x12 + bne RhpResolveInterfaceMethodFast_NextEntry + + ldur x0, [x14, #8] + ret + +RhpResolveInterfaceMethodFast_SlowPath + ldr xip0, =RhpCidResolve + mov xip1, x11 + b RhpUniversalTransitionReturnResult_DebugStepTailCall + + NESTED_END RhpResolveInterfaceMethodFast + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH + + END \ No newline at end of file diff --git a/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm b/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm index 7d3607f27a01eb..6a843f918f8033 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/UniversalTransition.asm @@ -89,7 +89,7 @@ TEXTAREA MACRO - UNIVERSAL_TRANSITION $FunctionName + UNIVERSAL_TRANSITION $FunctionName, $ReturnResult NESTED_ENTRY Rhp$FunctionName @@ -142,8 +142,14 @@ ;; Restore FP and LR registers, and free the allocated stack block EPILOG_RESTORE_REG_PAIR fp, lr, #STACK_SIZE! + IF $ReturnResult == 0 ;; Tailcall to the target address. EPILOG_NOP br x12 + ELSE + ;; Return target address + EPILOG_NOP mov x0, x12 + ret + ENDIF NESTED_END Rhp$FunctionName @@ -152,7 +158,10 @@ ; To enable proper step-in behavior in the debugger, we need to have two instances ; of the thunk. For the first one, the debugger steps into the call in the function, ; for the other, it steps over it. - UNIVERSAL_TRANSITION UniversalTransition - UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + UNIVERSAL_TRANSITION UniversalTransition, 0 + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall, 0 + UNIVERSAL_TRANSITION UniversalTransitionReturnResult, 1 + UNIVERSAL_TRANSITION UniversalTransitionReturnResult_DebugStepTailCall, 1 + END diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index b33f4376cbd893..319a7b2628f415 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -236,6 +236,8 @@ struct InterfaceDispatchCell } }; +#define IDC_CACHE_POINTER_MASK (InterfaceDispatchCell::Flags::IDC_CachePointerMask) + #endif // FEATURE_CACHED_INTERFACE_DISPATCH #ifdef TARGET_ARM diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 04bca41e476671..7f1697d813953f 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -264,6 +264,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS, // Transition to preemptive mode and track transitions in reverse P/Invoke prolog. CORINFO_HELP_GVMLOOKUP_FOR_SLOT, // Resolve a generic virtual method target from this pointer and runtime method handle + CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT, // Resolve a non-generic interface method from this pointer and dispatch cell CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index b4933c28b25afc..8cd04b77136b35 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -761,6 +761,8 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_GVMLOOKUP_FOR_SLOT: id = ReadyToRunHelper.GVMLookupForSlot; break; + case CorInfoHelpFunc.CORINFO_HELP_INTERFACELOOKUP_FOR_SLOT: + return _compilation.NodeFactory.ExternSymbol("RhpResolveInterfaceMethodFast"); case CorInfoHelpFunc.CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL: id = ReadyToRunHelper.TypeHandleToRuntimeType; diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 55251c3ec70f99..9c87afc766e2e9 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -459,7 +459,6 @@ NESTED_ENTRY JIT_Patchpoint, _TEXT call JIT_PatchpointWorkerWorkerWithPolicy EPILOG_WITH_TRANSITION_BLOCK_RETURN - TAILJMP_RAX NESTED_END JIT_Patchpoint, _TEXT ; first arg register holds iloffset, which needs to be moved to the second register, and the first register filled with NULL @@ -497,4 +496,4 @@ NESTED_ENTRY InterpreterStub, _TEXT NESTED_END InterpreterStub, _TEXT endif ; FEATURE_INTERPRETER - end \ No newline at end of file + end diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index b533789980c510..8560570fcc1fcc 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -7,6 +7,7 @@ include AsmConstants.inc CHAIN_SUCCESS_COUNTER equ ?g_dispatch_cache_chain_success_counter@@3_KA extern VSD_ResolveWorker:proc + extern VSD_ResolveWorkerForInterfaceLookupSlot:proc extern CHAIN_SUCCESS_COUNTER:dword BACKPATCH_FLAG equ 1 ;; Also known as SDF_ResolveBackPatch in the EE @@ -83,4 +84,24 @@ Fail: LEAF_END ResolveWorkerChainLookupAsmStub, _TEXT +;; On Input: +;; rcx contains object 'this' pointer +;; r11 contains the address of the indirection cell (with the flags in the low bits) +;; +;; Preserves all argument registers +NESTED_ENTRY JIT_InterfaceLookupForSlot, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock + mov rdx, r11 ; indirection cell + + call VSD_ResolveWorkerForInterfaceLookupSlot + + RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters + RESTORE_ARGUMENT_REGISTERS __PWTB_ArgumentRegisters + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END JIT_InterfaceLookupForSlot, _TEXT + end diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index ec952dd0f6f873..cb754c192d3a29 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -40,6 +40,27 @@ void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegis #undef CALLEE_SAVED_REGISTER } +#ifdef TARGET_WINDOWS +void UpdateRegDisplayFromArgumentRegisters(REGDISPLAY * pRD, ArgumentRegisters* pRegs) +{ + LIMITED_METHOD_CONTRACT; + + // TODO: Fix ENUM_ARGUMENT_REGISTERS to have consistent casing for rcx and rdx + + T_CONTEXT * pContext = pRD->pCurrentContext; + pContext->Rcx = pRegs->RCX; + pContext->Rdx = pRegs->RDX; + pContext->R8 = pRegs->R8; + pContext->R9 = pRegs->R9; + + KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers; + pContextPointers->Rcx = (PULONG64)&pRegs->RCX; + pContextPointers->Rdx = (PULONG64)&pRegs->RDX; + pContextPointers->R8 = (PULONG64)&pRegs->R8; + pContextPointers->R9 = (PULONG64)&pRegs->R9; +} +#endif + void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD) { LIMITED_METHOD_CONTRACT; @@ -79,6 +100,11 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters()); ClearRegDisplayArgumentAndScratchRegisters(pRD); +#ifdef TARGET_WINDOWS + // TODO: Quick hack + UpdateRegDisplayFromArgumentRegisters(pRD, GetArgumentRegisters()); +#endif + SyncRegDisplayToCurrentContext(pRD); LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay_Impl(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); diff --git a/src/coreclr/vm/contractimpl.h b/src/coreclr/vm/contractimpl.h index f8d7d81856f467..1d005d8e6ddddc 100644 --- a/src/coreclr/vm/contractimpl.h +++ b/src/coreclr/vm/contractimpl.h @@ -258,7 +258,6 @@ struct DispatchToken explicit DispatchToken(UINT_PTR token) { - CONSISTENCY_CHECK(token != INVALID_TOKEN); m_token = token; } diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index fe5564e299060d..2edd90acb8c7ae 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -3495,10 +3495,12 @@ HCIMPL1_RAW(void, JIT_ReversePInvokeExit, ReversePInvokeFrame* frame) } HCIMPLEND_RAW -// These two do take args but have a custom calling convention. +// These do take args but have a custom calling convention. EXTERN_C void JIT_ValidateIndirectCall(); EXTERN_C void JIT_DispatchIndirectCall(); +EXTERN_C void JIT_InterfaceLookupForSlot(); + //======================================================================== // // JIT HELPERS INITIALIZATION diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index a2bfb6f2c49604..88da6adbee3688 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -1400,6 +1400,99 @@ PCODE VSD_ResolveWorker(TransitionBlock * pTransitionBlock, return target; } +PCODE VSD_ResolveWorkerForInterfaceLookupSlot(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect) +{ + CONTRACTL { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(CheckPointer(pTransitionBlock)); + MODE_COOPERATIVE; + } CONTRACTL_END; + + MAKE_CURRENT_THREAD_AVAILABLE(); + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + StubDispatchFrame frame(pTransitionBlock); + StubDispatchFrame * pSDFrame = &frame; + + PCODE returnAddress = pSDFrame->GetUnadjustedReturnAddress(); + + StubCallSite callSite(siteAddrForRegisterIndirect, returnAddress); + + OBJECTREF *protectedObj = pSDFrame->GetThisPtr(); + _ASSERTE(protectedObj != NULL); + OBJECTREF pObj = *protectedObj; + + PCODE target = (PCODE)NULL; + + bool propagateExceptionToNativeCode = IsCallDescrWorkerInternalReturnAddress(pTransitionBlock->m_ReturnAddress); + + if (pObj == NULL) { + pSDFrame->Push(CURRENT_THREAD); + INSTALL_MANAGED_EXCEPTION_DISPATCHER_EX; + INSTALL_UNWIND_AND_CONTINUE_HANDLER_EX; + COMPlusThrow(kNullReferenceException); + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER_EX(propagateExceptionToNativeCode); + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER_EX(propagateExceptionToNativeCode); + _ASSERTE(!"Throw returned"); + } + + pSDFrame->SetCallSite(NULL, (TADDR)callSite.GetIndirectCell()); + + DispatchToken representativeToken = DispatchToken(VirtualCallStubManager::GetTokenFromStub(callSite.GetSiteTarget())); + + MethodTable * pRepresentativeMT = pObj->GetMethodTable(); + if (representativeToken.IsTypedToken()) + { + pRepresentativeMT = AppDomain::GetCurrentDomain()->LookupType(representativeToken.GetTypeID()); + CONSISTENCY_CHECK(CheckPointer(pRepresentativeMT)); + } + + pSDFrame->Push(CURRENT_THREAD); + + INSTALL_MANAGED_EXCEPTION_DISPATCHER_EX; + INSTALL_UNWIND_AND_CONTINUE_HANDLER_EX; + + GCPROTECT_BEGIN(*protectedObj); + + // For Virtual Delegates the m_siteAddr is a field of a managed object + // Thus we have to report it as an interior pointer, + // so that it is updated during a gc + GCPROTECT_BEGININTERIOR( *(callSite.GetIndirectCellAddress()) ); + + GCStress::MaybeTriggerAndProtect(pObj); + + PCODE callSiteTarget = callSite.GetSiteTarget(); + CONSISTENCY_CHECK(callSiteTarget != (PCODE)NULL); + + StubCodeBlockKind stubKind = STUB_CODE_BLOCK_UNKNOWN; + VirtualCallStubManager *pMgr = VirtualCallStubManager::FindStubManager(callSiteTarget, &stubKind); + PREFIX_ASSUME(pMgr != NULL); + + target = pMgr->ResolveWorker(&callSite, protectedObj, representativeToken, stubKind); + +#if _DEBUG + if (pSDFrame->GetGCRefMap() != NULL) + { + GCX_PREEMP(); + _ASSERTE(CheckGCRefMapEqual(pSDFrame->GetGCRefMap(), pSDFrame->GetFunction(), true)); + } +#endif // _DEBUG + + GCPROTECT_END(); + GCPROTECT_END(); + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER_EX(propagateExceptionToNativeCode); + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER_EX(propagateExceptionToNativeCode); + pSDFrame->Pop(CURRENT_THREAD); + + return target; +} + void VirtualCallStubManager::BackPatchWorkerStatic(PCODE returnAddress, TADDR siteAddrForRegisterIndirect) { CONTRACTL { diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h index 7638f2aec1eb29..054f146673ea2b 100644 --- a/src/coreclr/vm/virtualcallstub.h +++ b/src/coreclr/vm/virtualcallstub.h @@ -46,6 +46,7 @@ extern "C" PCODE STDCALL VSD_ResolveWorker(TransitionBlock * pTransitionBlock, #endif ); +extern "C" PCODE STDCALL VSD_ResolveWorkerForInterfaceLookupSlot(TransitionBlock * pTransitionBlock, TADDR siteAddrForRegisterIndirect); ///////////////////////////////////////////////////////////////////////////////////// #if defined(TARGET_X86) || defined(TARGET_AMD64) diff --git a/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs b/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs index 5e9189ac90a572..28c4a72e73a23f 100644 --- a/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs +++ b/src/tests/nativeaot/SmokeTests/ControlFlowGuard/ControlFlowGuard.cs @@ -35,6 +35,9 @@ static int Main(string[] args) // Are we running the control program? if (args.Length == 0) { + TestExceptionFromDispatch.Run(); + TestInterfaceDispatch.Run(); + // Dry run - execute all scenarios while s_armed is false. // // The replaced call target will not be considered invalid by CFG and none of this @@ -84,6 +87,120 @@ static int Main(string[] args) return 10; } + class TestExceptionFromDispatch + { + class CastableObject : IDynamicInterfaceCastable + { + public RuntimeTypeHandle GetInterfaceImplementation(RuntimeTypeHandle interfaceType) => throw new Exception(); + public bool IsInterfaceImplemented(RuntimeTypeHandle interfaceType, bool throwIfNotImplemented) => true; + } + + public static void Run() + { + bool caughtException = false; + + IDisposable obj = (IDisposable)new CastableObject(); + try + { + obj.Dispose(); + } + catch (Exception) + { + caughtException = true; + } + + if (!caughtException) + throw new Exception(); + } + } + + internal class TestInterfaceDispatch + { + interface IFoo + { + int Call(int x, int y); + } + + interface IFoo + { + int Call(int x, int y); + } + + class C1 : IFoo, IFoo + { + public int Call(int x, int y) => x + y; + } + + class C2 : IFoo, IFoo + { + public int Call(int x, int y) => x - y; + } + class C3 : IFoo, IFoo + { + public int Call(int x, int y) => x * y; + } + + class C4 : IFoo, IFoo + { + public int Call(int x, int y) => x / y; + } + + class C5 : IFoo, IFoo + { + public int Call(int x, int y) => x % y; + } + + public static void Run() + { + if (Call(new C1(), 10, 20) != (10 + 20)) + throw new Exception(); + if (Call(new C1(), 11, 22) != (11 + 22)) + throw new Exception(); + if (Call(new C2(), 10, 20) != (10 - 20)) + throw new Exception(); + if (Call(new C2(), 11, 22) != (11 - 22)) + throw new Exception(); + if (Call(new C3(), 10, 20) != (10 * 20)) + throw new Exception(); + if (Call(new C3(), 11, 22) != (11 * 22)) + throw new Exception(); + if (Call(new C4(), 10, 20) != (10 / 20)) + throw new Exception(); + if (Call(new C5(), 10, 20) != (10 % 20)) + throw new Exception(); + + if (CallGen(new C1(), 10, 20) != (10 + 20)) + throw new Exception(); + if (CallGen(new C2(), 11, 22) != (11 - 22)) + throw new Exception(); + if (CallGen(new C3(), 11, 22) != (11 * 22)) + throw new Exception(); + if (CallGen(new C4(), 10, 20) != (10 / 20)) + throw new Exception(); + if (CallGen(new C5(), 10, 20) != (10 % 20)) + throw new Exception(); + + bool caught = false; + try + { + Call(null, 10, 20); + } + catch (NullReferenceException) + { + caught = true; + } + + if (!caught) + throw new Exception(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int Call(IFoo f, int x, int y) => f.Call(x, y); + + [MethodImpl(MethodImplOptions.NoInlining)] + static int CallGen(IFoo f, int x, int y) => f.Call(x, y); + } + class TestFunctionPointer { public static int Run()