From 648437b1b4db6536ae33763b6c9178287c0a52bd Mon Sep 17 00:00:00 2001 From: Sergey Andreenko Date: Thu, 21 Jan 2021 13:37:42 -0800 Subject: [PATCH] Arm64 apple vm fixes for arg alignment. (#46665) * Add `MarshalInfo::IsValueClass`. * Add `TypeHandle* pTypeHandle` to `SizeOf`. * Add a few asserts/start using inline function instead of macro. * use TARGET_POINTER_SIZE instead of STACK_ELEM_SIZE. * Use `m_curOfs` instead of `m_idxStack` in `ArgIteratorBase` on all platforms. Before some platforms were using stackSlots, some curOfs in bytes. * Use byte sizes and offsets in `ArgLocDesc`. Fix arm32. x86 fixes. use StackSize on ArgSizes Add `GetStackArgumentByteIndexFromOffset` and return back the old values for asserts. another fix * Stop using `#define STACK_ELEM_SIZE` * Add `isFloatHfa`. * delete checking code. because it won't pass on arm64 apple. * arm64 apple fixes. * roundUp the stack size. * Add a reflection test. * Return byte offset from `GetNextOfs`. It is not a complete fix for arm64 apple, but covers most cases. * Add `FLOAT_REGISTER_SIZE` * Use StackElemSize for ` pLoc->m_byteStackSize`. * replace `assert` with `_ASSERTE`. * Use `ALIGN_UP` in the code that I have changed. * rename `m_curOfs` as `m_ofsStack`. * delete "ceremony " from `StackElemSize`. * Delete `cSlots` and don't call `StackElemSize` on `GetArgSize`. * Fix an assert. * Fix nit. * fix wrong return for hfa. * fix nit. * Fix crossgen job. --- .../classlibnative/bcltype/varargsnative.cpp | 39 ++-- src/coreclr/gcinfo/gcinfoencoder.cpp | 2 + src/coreclr/inc/stdmacros.h | 10 +- src/coreclr/vm/amd64/cgencpu.h | 12 +- src/coreclr/vm/arm/cgencpu.h | 12 +- src/coreclr/vm/arm64/cgencpu.h | 28 ++- src/coreclr/vm/callhelpers.cpp | 3 +- src/coreclr/vm/callingconvention.h | 201 ++++++++++-------- src/coreclr/vm/clrvarargs.cpp | 4 +- src/coreclr/vm/comdelegate.cpp | 70 ++++-- src/coreclr/vm/comtoclrcall.cpp | 21 +- src/coreclr/vm/dllimport.cpp | 11 +- src/coreclr/vm/i386/cgencpu.h | 15 +- src/coreclr/vm/method.hpp | 5 +- src/coreclr/vm/mlinfo.cpp | 41 ++-- src/coreclr/vm/mlinfo.h | 1 + src/coreclr/vm/reflectioninvocation.cpp | 3 +- src/coreclr/vm/siginfo.cpp | 7 +- src/coreclr/vm/siginfo.hpp | 5 +- src/coreclr/vm/typehandle.cpp | 11 + src/coreclr/vm/typehandle.h | 2 + .../regression/manyStackArgs/manyStackArgs.cs | 64 ++++++ .../manyStackArgs/manyStackArgs.csproj | 13 ++ 23 files changed, 385 insertions(+), 195 deletions(-) create mode 100644 src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs create mode 100644 src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj diff --git a/src/coreclr/classlibnative/bcltype/varargsnative.cpp b/src/coreclr/classlibnative/bcltype/varargsnative.cpp index 55dd48bf280ec..a5f206aeef5de 100644 --- a/src/coreclr/classlibnative/bcltype/varargsnative.cpp +++ b/src/coreclr/classlibnative/bcltype/varargsnative.cpp @@ -21,7 +21,7 @@ // pointer to achieve such an alignment for the next argument on those platforms (otherwise it is a no-op). // NOTE: the debugger has its own implementation of this algorithm in Debug\DI\RsType.cpp, CordbType::RequiresAlign8() // so if you change this implementation be sure to update the debugger's version as well. -static void AdjustArgPtrForAlignment(VARARGS *pData, size_t cbArg) +static void AdjustArgPtrForAlignment(VARARGS *pData, unsigned cbArg) { #ifdef TARGET_ARM // Only 64-bit primitives or value types with embedded 64-bit primitives are aligned on 64-bit boundaries. @@ -114,7 +114,9 @@ static void InitCommon(VARARGS *data, VASigCookie** cookie) // which is the start of the first fixed arg (arg1). // Always skip over the varargs_cookie. - data->ArgPtr += StackElemSize(sizeof(LPVOID)); + const bool isValueType = false; + const bool isFloatHfa = false; + data->ArgPtr += StackElemSize(TARGET_POINTER_SIZE, isValueType, isFloatHfa); #endif } @@ -138,9 +140,11 @@ void AdvanceArgPtr(VARARGS *data) break; SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic - SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext); - SIZE_T cbArg = StackElemSize(cbRaw); - + TypeHandle thValueType; + const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext, &thValueType); + const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); + const bool isFloatHfa = false; + unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -263,9 +267,11 @@ VarArgsNative::Init2, } SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic - SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext); - SIZE_T cbArg = StackElemSize(cbRaw); - + TypeHandle thValueType; + unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); + const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); + const bool isFloatHfa = false; + unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -401,7 +407,8 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC TypeHandle typehandle = refType->GetType(); _ASSERTE(_this != NULL); - UINT size = 0; + unsigned size = 0; + bool isValueType = false; CorElementType typ = typehandle.GetInternalCorElementType(); if (CorTypeInfo::IsPrimitiveType(typ)) @@ -414,15 +421,15 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC } else if (typ == ELEMENT_TYPE_VALUETYPE) { + isValueType = true; size = typehandle.AsMethodTable()->GetNativeSize(); } else { COMPlusThrow(kNotSupportedException, W("NotSupported_Type")); } - - size = StackElemSize(size); - + const bool isFloatHfa = false; + size = StackElemSize(size, isValueType, isFloatHfa); AdjustArgPtrForAlignment(_this, size); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE @@ -472,9 +479,11 @@ VarArgsNative::GetNextArgHelper( _ASSERTE(data->RemainingArgs != 0); SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic - SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext); - SIZE_T cbArg = StackElemSize(cbRaw); - + TypeHandle thValueType; + const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); + const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); + const bool isFloatHfa = false; + unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa); AdjustArgPtrForAlignment(data, cbArg); // Get a pointer to the beginning of the argument. diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index b906c3e147a74..8f56607e22ba5 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -577,8 +577,10 @@ GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcSta _ASSERTE( (flags & (GC_SLOT_IS_REGISTER | GC_SLOT_IS_DELETED)) == 0 ); +#if !defined(OSX_ARM64_ABI) // the spOffset for the stack slot is required to be pointer size aligned _ASSERTE((spOffset % TARGET_POINTER_SIZE) == 0); +#endif m_SlotTable[ m_NumSlots ].Slot.Stack.SpOffset = spOffset; m_SlotTable[ m_NumSlots ].Slot.Stack.Base = spBase; diff --git a/src/coreclr/inc/stdmacros.h b/src/coreclr/inc/stdmacros.h index 6f1f884b2c7ac..f2417c7637545 100644 --- a/src/coreclr/inc/stdmacros.h +++ b/src/coreclr/inc/stdmacros.h @@ -185,17 +185,11 @@ inline size_t ALIGN_UP( size_t val, size_t alignment ) _ASSERTE( result >= val ); // check for overflow return result; } -inline void* ALIGN_UP( void* val, size_t alignment ) -{ - WRAPPER_NO_CONTRACT; - return (void*) ALIGN_UP( (size_t)val, alignment ); -} -inline uint8_t* ALIGN_UP( uint8_t* val, size_t alignment ) +template inline T ALIGN_UP(T val, size_t alignment) { WRAPPER_NO_CONTRACT; - - return (uint8_t*) ALIGN_UP( (size_t)val, alignment ); + return (T)ALIGN_UP((size_t)val, alignment); } inline size_t ALIGN_DOWN( size_t val, size_t alignment ) diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index c7ecc88d2c4d2..7312ad0a019fe 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -101,6 +101,8 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr); #define X86RegFromAMD64Reg(extended_reg) \ ((X86Reg)(((int)extended_reg) & X86_REGISTER_MASK)) +#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes. + // Why is the return value ARG_SLOT? On 64-bit systems, that is 64-bits // and much bigger than necessary for R4, requiring explicit downcasts. inline @@ -145,11 +147,11 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) // Parameter size //********************************************************************** -typedef INT64 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - -// !! This expression assumes STACK_ELEM_SIZE is a power of 2. -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) +{ + const unsigned stackSlotSize = 8; + return ALIGN_UP(parmSize, stackSlotSize); +} //********************************************************************** // Frames diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 3739f4ce5dc66..c68e763e8945d 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -95,15 +95,17 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); // Offset of pc register #define PC_REG_RELATIVE_OFFSET 4 +#define FLOAT_REGISTER_SIZE 4 // each register in FloatArgumentRegisters is 4 bytes. + //********************************************************************** // Parameter size //********************************************************************** -typedef INT32 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - -// !! This expression assumes STACK_ELEM_SIZE is a power of 2. -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) +{ + const unsigned stackSlotSize = 4; + return ALIGN_UP(parmSize, stackSlotSize); +} //********************************************************************** // Frames diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index b28ac4d99c9fe..c87dbb11601f8 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -58,6 +58,8 @@ extern PCODE GetPreStubEntryPoint(); #define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter #define CALLDESCR_RETBUFFARGREG 1 // CallDescrWorker has RetBuffArg parameter that's separate from arg regs +#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes. + // Given a return address retrieved during stackwalk, // this is the offset by which it should be decremented to arrive at the callsite. #define STACKWALK_CONTROLPC_ADJUST_OFFSET 4 @@ -81,13 +83,27 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) // Parameter size //********************************************************************** -typedef INT64 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - -// The expression below assumes STACK_ELEM_SIZE is a power of 2, so check that. -static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); +inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatHfa) +{ +#if defined(OSX_ARM64_ABI) + if (!isValueType) + { + // The primitive types' sizes are expected to be powers of 2. + _ASSERTE((parmSize & (parmSize - 1)) == 0); + // No padding/alignment for primitive types. + return parmSize; + } + if (isFloatHfa) + { + _ASSERTE((parmSize % 4) == 0); + // float hfa is not considered a struct type and passed with 4-byte alignment. + return parmSize; + } +#endif -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) + const unsigned stackSlotSize = 8; + return ALIGN_UP(parmSize, stackSlotSize); +} // // JIT HELPERS. diff --git a/src/coreclr/vm/callhelpers.cpp b/src/coreclr/vm/callhelpers.cpp index 492869f1c328f..d5493b661bc32 100644 --- a/src/coreclr/vm/callhelpers.cpp +++ b/src/coreclr/vm/callhelpers.cpp @@ -517,7 +517,8 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT * CallDescrData callDescrData; callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock); - callDescrData.numStackSlots = nStackBytes / STACK_ELEM_SIZE; + _ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0); + callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE; #ifdef CALLDESCR_ARGREGS callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters()); #endif diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 129970032a0a4..8a1e934d3d78d 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -39,8 +39,8 @@ struct ArgLocDesc int m_idxGenReg; // First general register used (or -1) int m_cGenReg; // Count of general registers used (or 0) - int m_idxStack; // First stack slot used (or -1) - int m_cStack; // Count of stack slots used (or 0) + int m_byteStackIndex; // Stack offset in bytes (or -1) + int m_byteStackSize; // Stack size in bytes #if defined(UNIX_AMD64_ABI) @@ -85,8 +85,8 @@ struct ArgLocDesc m_cFloatReg = 0; m_idxGenReg = -1; m_cGenReg = 0; - m_idxStack = -1; - m_cStack = 0; + m_byteStackIndex = -1; + m_byteStackSize = 0; #if defined(TARGET_ARM) m_fRequires64BitAlignment = FALSE; #endif @@ -218,14 +218,23 @@ struct TransitionBlock #if defined(UNIX_AMD64_ABI) _ASSERTE(offset != TransitionBlock::StructInRegsOffset); #endif - return (offset - GetOffsetOfArgumentRegisters()) / TARGET_POINTER_SIZE; + offset -= GetOffsetOfArgumentRegisters(); + _ASSERTE((offset % TARGET_POINTER_SIZE) == 0); + return offset / TARGET_POINTER_SIZE; } static UINT GetStackArgumentIndexFromOffset(int offset) { LIMITED_METHOD_CONTRACT; - return (offset - TransitionBlock::GetOffsetOfArgs()) / STACK_ELEM_SIZE; + return (offset - TransitionBlock::GetOffsetOfArgs()) / TARGET_POINTER_SIZE; + } + + static UINT GetStackArgumentByteIndexFromOffset(int offset) + { + LIMITED_METHOD_CONTRACT; + + return (offset - TransitionBlock::GetOffsetOfArgs()); } #ifdef CALLDESCR_FPARGREGS @@ -319,6 +328,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (!(m_dwFlags & SIZE_OF_ARG_STACK_COMPUTED)) ForceSigWalk(); _ASSERTE((m_dwFlags & SIZE_OF_ARG_STACK_COMPUTED) != 0); + _ASSERTE((m_nSizeOfArgStack % TARGET_POINTER_SIZE) == 0); return m_nSizeOfArgStack; } @@ -336,7 +346,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE // The argument registers are not included in the stack size on AMD64 size += ARGUMENTREGISTERS_SIZE; #endif - + _ASSERTE((size % TARGET_POINTER_SIZE) == 0); return size; } @@ -595,17 +605,16 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); - int cSlots = (GetArgSize() + 3) / 4; if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - _ASSERTE(cSlots == 1); - pLoc->m_cGenReg = cSlots; + _ASSERTE(GetArgSize() <= TARGET_POINTER_SIZE); + pLoc->m_cGenReg = 1; } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - pLoc->m_cStack = cSlots; + pLoc->m_byteStackSize = GetArgSize(); + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); } } #endif @@ -620,12 +629,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->m_fRequires64BitAlignment = m_fRequires64BitAlignment; - int cSlots = (GetArgSize() + 3) / 4; - + const int byteArgSize = GetArgSize(); if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 4; - pLoc->m_cFloatReg = cSlots; + const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); + _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); + pLoc->m_idxFloatReg = floatRegOfsInBytes / FLOAT_REGISTER_SIZE; + pLoc->m_cFloatReg = ALIGN_UP(byteArgSize, FLOAT_REGISTER_SIZE) / FLOAT_REGISTER_SIZE; return; } @@ -633,22 +643,21 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - if (cSlots <= (4 - pLoc->m_idxGenReg)) + if (byteArgSize <= (4 - pLoc->m_idxGenReg) * TARGET_POINTER_SIZE) { - pLoc->m_cGenReg = cSlots; + pLoc->m_cGenReg = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; } else { pLoc->m_cGenReg = 4 - pLoc->m_idxGenReg; - - pLoc->m_idxStack = 0; - pLoc->m_cStack = cSlots - pLoc->m_cGenReg; + pLoc->m_byteStackIndex = 0; + pLoc->m_byteStackSize = StackElemSize(byteArgSize) - pLoc->m_cGenReg * TARGET_POINTER_SIZE; } } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - pLoc->m_cStack = cSlots; + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + pLoc->m_byteStackSize = StackElemSize(byteArgSize); } } #endif // TARGET_ARM @@ -661,16 +670,18 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - // Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes. - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 16; + const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); + _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); + pLoc->m_idxFloatReg = floatRegOfsInBytes / FLOAT_REGISTER_SIZE; if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()) { CorInfoHFAElemType type = m_argTypeHandle.GetHFAType(); pLoc->setHFAFieldSize(type); - pLoc->m_cFloatReg = GetArgSize()/pLoc->m_hfaFieldSize; + pLoc->m_cFloatReg = GetArgSize() / pLoc->m_hfaFieldSize; } else @@ -680,29 +691,31 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE return; } - int cSlots = (GetArgSize() + 7)/ 8; + unsigned byteArgSize = GetArgSize(); + // Question: why do not arm and x86 have similar checks? // Composites greater than 16 bytes are passed by reference - if (GetArgType() == ELEMENT_TYPE_VALUETYPE && GetArgSize() > ENREGISTERED_PARAMTYPE_MAXSIZE) + if ((GetArgType() == ELEMENT_TYPE_VALUETYPE) && (byteArgSize > ENREGISTERED_PARAMTYPE_MAXSIZE)) { - cSlots = 1; + byteArgSize = TARGET_POINTER_SIZE; } -#ifdef TARGET_ARM64 + // Sanity check to make sure no caller is trying to get an ArgLocDesc that // describes the return buffer reg field that's in the TransitionBlock. _ASSERTE(argOffset != TransitionBlock::GetOffsetOfRetBuffArgReg()); -#endif if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - pLoc->m_cGenReg = cSlots; + pLoc->m_cGenReg = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;; } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - pLoc->m_cStack = cSlots; + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + const bool isValueType = (m_argType == ELEMENT_TYPE_VALUETYPE); + const bool isFloatHfa = (isValueType && !m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()); + pLoc->m_byteStackSize = StackElemSize(byteArgSize, isValueType, isFloatHfa); } } #endif // TARGET_ARM64 @@ -734,8 +747,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE #if defined(UNIX_AMD64_ABI) if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - // Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes. - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 16; + const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); + _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); + pLoc->m_idxFloatReg = floatRegOfsInBytes / FLOAT_REGISTER_SIZE; pLoc->m_cFloatReg = 1; } else @@ -758,13 +772,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - int argOnStackSize; + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + int argSizeInBytes; if (IsArgPassedByRef()) - argOnStackSize = STACK_ELEM_SIZE; + argSizeInBytes = TARGET_POINTER_SIZE; else - argOnStackSize = GetArgSize(); - pLoc->m_cStack = (argOnStackSize + STACK_ELEM_SIZE - 1) / STACK_ELEM_SIZE; + argSizeInBytes = GetArgSize(); + pLoc->m_byteStackSize = StackElemSize(argSizeInBytes); } } #endif // TARGET_AMD64 @@ -784,36 +798,29 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE bool m_hasArgLocDescForStructInRegs; #endif // (TARGET_AMD64 && UNIX_AMD64_ABI) || TARGET_ARM64 + int m_ofsStack; // Current position of the stack iterator, in bytes + #ifdef TARGET_X86 - int m_curOfs; // Current position of the stack iterator int m_numRegistersUsed; #ifdef FEATURE_INTERPRETER bool m_fUnmanagedCallConv; #endif #endif -#ifdef TARGET_AMD64 #ifdef UNIX_AMD64_ABI int m_idxGenReg; // Next general register to be assigned a value - int m_idxStack; // Next stack slot to be assigned a value int m_idxFPReg; // Next floating point register to be assigned a value bool m_fArgInRegisters; // Indicates that the current argument is stored in registers -#else - int m_curOfs; // Current position of the stack iterator -#endif #endif #ifdef TARGET_ARM int m_idxGenReg; // Next general register to be assigned a value - int m_idxStack; // Next stack slot to be assigned a value - WORD m_wFPRegs; // Bitmask of available floating point argument registers (s0-s15/d0-d7) bool m_fRequires64BitAlignment; // Cached info about the current arg #endif #ifdef TARGET_ARM64 int m_idxGenReg; // Next general register to be assigned a value - int m_idxStack; // Next stack slot to be assigned a value int m_idxFPReg; // Next FP register to be assigned a value #endif @@ -1023,7 +1030,7 @@ int ArgIteratorTemplate::GetNextOffset() case IMAGE_CEE_CS_CALLCONV_C: case IMAGE_CEE_CS_CALLCONV_STDCALL: m_numRegistersUsed = NUM_ARGUMENT_REGISTERS; - m_curOfs = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); m_fUnmanagedCallConv = true; break; @@ -1034,30 +1041,30 @@ int ArgIteratorTemplate::GetNextOffset() default: m_fUnmanagedCallConv = false; m_numRegistersUsed = numRegistersUsed; - m_curOfs = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); break; } #else m_numRegistersUsed = numRegistersUsed; - m_curOfs = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); #endif #elif defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI m_idxGenReg = numRegistersUsed; - m_idxStack = 0; + m_ofsStack = 0; m_idxFPReg = 0; #else - m_curOfs = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); #endif #elif defined(TARGET_ARM) m_idxGenReg = numRegistersUsed; - m_idxStack = 0; + m_ofsStack = 0; m_wFPRegs = 0; #elif defined(TARGET_ARM64) m_idxGenReg = numRegistersUsed; - m_idxStack = 0; + m_ofsStack = 0; m_idxFPReg = 0; #else @@ -1090,8 +1097,8 @@ int ArgIteratorTemplate::GetNextOffset() #ifdef FEATURE_INTERPRETER if (m_fUnmanagedCallConv) { - int argOfs = m_curOfs; - m_curOfs += StackElemSize(argSize); + int argOfs = m_ofsStack; + m_ofsStack += StackElemSize(argSize); return argOfs; } #endif @@ -1100,9 +1107,9 @@ int ArgIteratorTemplate::GetNextOffset() return TransitionBlock::GetOffsetOfArgumentRegisters() + (NUM_ARGUMENT_REGISTERS - m_numRegistersUsed) * sizeof(void *); } - m_curOfs -= StackElemSize(argSize); - _ASSERTE(m_curOfs >= TransitionBlock::GetOffsetOfArgs()); - return m_curOfs; + m_ofsStack -= StackElemSize(argSize); + _ASSERTE(m_ofsStack >= TransitionBlock::GetOffsetOfArgs()); + return m_ofsStack; #elif defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI @@ -1195,16 +1202,15 @@ int ArgIteratorTemplate::GetNextOffset() m_fArgInRegisters = false; - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * STACK_ELEM_SIZE; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; - int cArgSlots = cbArg / STACK_ELEM_SIZE; - m_idxStack += cArgSlots; + m_ofsStack += cbArg; return argOfs; #else // Each argument takes exactly one slot on AMD64 on Windows - int argOfs = m_curOfs; - m_curOfs += sizeof(void *); + int argOfs = m_ofsStack; + m_ofsStack += sizeof(void *); return argOfs; #endif #elif defined(TARGET_ARM) @@ -1266,7 +1272,7 @@ int ArgIteratorTemplate::GetNextOffset() m_fRequires64BitAlignment = fRequiresAlign64Bit; int cbArg = StackElemSize(argSize); - int cArgSlots = cbArg / 4; + _ASSERTE((cbArg % TARGET_POINTER_SIZE) == 0); // Ignore floating point argument placement in registers if we're dealing with a vararg function (the ABI // specifies this so that vararg processing on the callee side is simplified). @@ -1313,13 +1319,15 @@ int ArgIteratorTemplate::GetNextOffset() // Doubles or HFAs containing doubles need the stack aligned appropriately. if (fRequiresAlign64Bit) - m_idxStack = (int)ALIGN_UP(m_idxStack, 2); + { + m_ofsStack = (int)ALIGN_UP(m_ofsStack, TARGET_POINTER_SIZE * 2); + } // Indicate the stack location of the argument to the caller. - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 4; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; // Record the stack usage. - m_idxStack += cArgSlots; + m_ofsStack += cbArg; return argOfs; } @@ -1341,10 +1349,10 @@ int ArgIteratorTemplate::GetNextOffset() int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 4; int cRemainingRegs = 4 - m_idxGenReg; - if (cArgSlots <= cRemainingRegs) + if (cbArg <= cRemainingRegs * TARGET_POINTER_SIZE) { // Mark the registers just allocated as used. - m_idxGenReg += cArgSlots; + m_idxGenReg += ALIGN_UP(cbArg, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; return argOfs; } @@ -1355,9 +1363,9 @@ int ArgIteratorTemplate::GetNextOffset() m_idxGenReg = 4; - if (m_idxStack == 0) + if (m_ofsStack == 0) { - m_idxStack += cArgSlots - cRemainingRegs; + m_ofsStack += cbArg - cRemainingRegs * TARGET_POINTER_SIZE; return argOfs; } } @@ -1366,13 +1374,13 @@ int ArgIteratorTemplate::GetNextOffset() { // The argument requires 64-bit alignment. If it is going to be passed on the stack, align // the next stack slot. See step C.6 in the algorithm in the ABI spec. - m_idxStack = (int)ALIGN_UP(m_idxStack, 2); + m_ofsStack = (int)ALIGN_UP(m_ofsStack, TARGET_POINTER_SIZE * 2); } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 4; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; // Advance the stack pointer over the argument just placed. - m_idxStack += cArgSlots; + m_ofsStack += cbArg; return argOfs; #elif defined(TARGET_ARM64) @@ -1428,10 +1436,8 @@ int ArgIteratorTemplate::GetNextOffset() default: break; } - - int cbArg = StackElemSize(argSize); - int cArgSlots = cbArg / STACK_ELEM_SIZE; - + const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); + const int cbArg = StackElemSize(argSize, isValueType, thValueType.IsFloatHfa()); if (cFPRegs>0 && !this->IsVarArg()) { if (cFPRegs + m_idxFPReg <= 8) @@ -1448,13 +1454,17 @@ int ArgIteratorTemplate::GetNextOffset() } else { +#if !defined(OSX_ARM64_ABI) + _ASSERTE((cbArg% TARGET_POINTER_SIZE) == 0); +#endif + const int regSlots = ALIGN_UP(cbArg, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; // Only x0-x7 are valid argument registers (x8 is always the return buffer) - if (m_idxGenReg + cArgSlots <= 8) + if (m_idxGenReg + regSlots <= 8) { // The entirety of the arg fits in the register slots. int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; - m_idxGenReg += cArgSlots; + m_idxGenReg += regSlots; return argOfs; } else @@ -1467,9 +1477,9 @@ int ArgIteratorTemplate::GetNextOffset() // into x0-x7, and any remaining stack arguments are placed normally. int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; - // Increase m_idxStack to account for the space used for the remainder of the arg after - // register slots are filled. - m_idxStack += (m_idxGenReg + cArgSlots - 8); + // Increase m_ofsStack to account for the space used for the remainder of the arg after + // registers are filled. + m_ofsStack += cbArg + (m_idxGenReg - 8) * TARGET_POINTER_SIZE; // We used up the remaining reg slots. m_idxGenReg = 8; @@ -1485,8 +1495,8 @@ int ArgIteratorTemplate::GetNextOffset() } } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 8; - m_idxStack += cArgSlots; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; + m_ofsStack += cbArg; return argOfs; #else PORTABILITY_ASSERT("ArgIteratorTemplate::GetNextOffset"); @@ -1731,13 +1741,17 @@ void ArgIteratorTemplate::ForceSigWalk() #else // UNIX_AMD64_ABI // All stack arguments take just one stack slot on AMD64 because of arguments bigger // than a stack slot are passed by reference. - stackElemSize = STACK_ELEM_SIZE; + stackElemSize = TARGET_POINTER_SIZE; #endif // UNIX_AMD64_ABI #else // TARGET_AMD64 - stackElemSize = StackElemSize(GetArgSize()); + + TypeHandle thValueType; + const CorElementType argType = GetArgType(&thValueType); + const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); + stackElemSize = StackElemSize(GetArgSize(), isValueType, thValueType.IsFloatHfa()); #if defined(ENREGISTERED_PARAMTYPE_MAXSIZE) if (IsArgPassedByRef()) - stackElemSize = STACK_ELEM_SIZE; + stackElemSize = TARGET_POINTER_SIZE; #endif #endif // TARGET_AMD64 @@ -1771,6 +1785,9 @@ void ArgIteratorTemplate::ForceSigWalk() #endif // TARGET_X86 + // arg stack size is rounded to the pointer size on all platforms. + nSizeOfArgStack = (int)ALIGN_UP(nSizeOfArgStack, TARGET_POINTER_SIZE); + // Cache the result m_nSizeOfArgStack = nSizeOfArgStack; m_dwFlags |= SIZE_OF_ARG_STACK_COMPUTED; diff --git a/src/coreclr/vm/clrvarargs.cpp b/src/coreclr/vm/clrvarargs.cpp index 303220b4464a6..37c30739aedf5 100644 --- a/src/coreclr/vm/clrvarargs.cpp +++ b/src/coreclr/vm/clrvarargs.cpp @@ -76,7 +76,9 @@ VARARGS::MarshalToUnmanagedVaList( case ELEMENT_TYPE_U: case ELEMENT_TYPE_PTR: { - DWORD cbSize = StackElemSize(CorTypeInfo::Size(elemType)); + const bool isValueType = false; + const bool isFloatHfa = false; + DWORD cbSize = StackElemSize(CorTypeInfo::Size(elemType), isValueType, isFloatHfa); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (cbSize > ENREGISTERED_PARAMTYPE_MAXSIZE) diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 7a52f6fdedc32..35baf178c3924 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -75,8 +75,8 @@ class ShuffleIterator int m_currentGenRegIndex; // Current floating point register index (relative to the ArgLocDesc::m_idxFloatReg) int m_currentFloatRegIndex; - // Current stack slot index (relative to the ArgLocDesc::m_idxStack) - int m_currentStackSlotIndex; + // Current byte stack index (relative to the ArgLocDesc::m_byteStackIndex) + int m_currentByteStackIndex; #if defined(UNIX_AMD64_ABI) // Get next shuffle offset for struct passed in registers. There has to be at least one offset left. @@ -134,7 +134,7 @@ class ShuffleIterator #endif m_currentGenRegIndex(0), m_currentFloatRegIndex(0), - m_currentStackSlotIndex(0) + m_currentByteStackIndex(0) { } @@ -143,11 +143,13 @@ class ShuffleIterator { return (m_currentGenRegIndex < m_argLocDesc->m_cGenReg) || (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg) || - (m_currentStackSlotIndex < m_argLocDesc->m_cStack); + (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize); } // Get next offset to shuffle. There has to be at least one offset left. - UINT16 GetNextOfs() + // For register arguments it returns regNum | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK. + // For stack arguments it returns stack offset in bytes with negative sign. + int GetNextOfs() { int index; @@ -157,7 +159,9 @@ class ShuffleIterator EEClass* eeClass = m_argLocDesc->m_eeClass; if (m_argLocDesc->m_eeClass != 0) { - return GetNextOfsInStruct(); + index = GetNextOfsInStruct(); + _ASSERT((index & ShuffleEntry::REGMASK) != 0); + return index; } #endif // UNIX_AMD64_ABI @@ -167,7 +171,7 @@ class ShuffleIterator index = m_argLocDesc->m_idxFloatReg + m_currentFloatRegIndex; m_currentFloatRegIndex++; - return (UINT16)index | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK; + return index | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK; } // Shuffle any registers first (the order matters since otherwise we could end up shuffling a stack slot @@ -177,15 +181,16 @@ class ShuffleIterator index = m_argLocDesc->m_idxGenReg + m_currentGenRegIndex; m_currentGenRegIndex++; - return (UINT16)index | ShuffleEntry::REGMASK; + return index | ShuffleEntry::REGMASK; } // If we get here we must have at least one stack slot left to shuffle (this method should only be called // when AnythingToShuffle(pArg) == true). - if (m_currentStackSlotIndex < m_argLocDesc->m_cStack) - { - index = m_argLocDesc->m_idxStack + m_currentStackSlotIndex; - m_currentStackSlotIndex++; + if (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) + { + const unsigned byteIndex = m_argLocDesc->m_byteStackIndex + m_currentByteStackIndex; + index = byteIndex / TARGET_POINTER_SIZE; + m_currentByteStackIndex += TARGET_POINTER_SIZE; // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. if (index >= ShuffleEntry::REGMASK) @@ -193,7 +198,7 @@ class ShuffleIterator COMPlusThrow(kNotSupportedException); } - return (UINT16)index; + return -(int)byteIndex; } // There are no more offsets to get, the caller should not have called us @@ -262,13 +267,39 @@ BOOL AddNextShuffleEntryToArray(ArgLocDesc sArgSrc, ArgLocDesc sArgDst, SArrayAppend(entry); } } @@ -605,13 +637,13 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArrayAppend(entry); - cbSize -= STACK_ELEM_SIZE; + cbSize -= TARGET_POINTER_SIZE; } while (cbSize > 0); } diff --git a/src/coreclr/vm/comtoclrcall.cpp b/src/coreclr/vm/comtoclrcall.cpp index 6a68c2c2009d1..f1d11c2be076d 100644 --- a/src/coreclr/vm/comtoclrcall.cpp +++ b/src/coreclr/vm/comtoclrcall.cpp @@ -226,7 +226,7 @@ inline static void InvokeStub(ComCallMethodDesc *pCMD, PCODE pManagedTarget, OBJ INT_PTR dangerousThis; *(OBJECTREF *)&dangerousThis = orThis; - DWORD dwStackSlots = pCMD->GetNumStackBytes() / STACK_ELEM_SIZE; + DWORD dwStackSlots = pCMD->GetNumStackBytes() / TARGET_POINTER_SIZE; // Managed code is generally "THROWS" and we have no exception handler here that the contract system can // see. We ensure that we don't get exceptions here by generating a try/catch in the IL stub that covers @@ -827,7 +827,7 @@ void ComCallMethodDesc::InitRuntimeNativeInfo(MethodDesc *pStubMD) NewArrayHolder pwStubStackSlotOffsets; UINT16 *pOutputStack = NULL; - UINT16 wStubStackSlotCount = static_cast(dwArgStack) / STACK_ELEM_SIZE; + UINT16 wStubStackSlotCount = static_cast(dwArgStack) / TARGET_POINTER_SIZE; if (wStubStackSlotCount > 0) { pwStubStackSlotOffsets = new UINT16[wStubStackSlotCount]; @@ -843,15 +843,15 @@ void ComCallMethodDesc::InitRuntimeNativeInfo(MethodDesc *pStubMD) if (!pStubMD->IsStatic()) { numRegistersUsed++; - wInputStack += STACK_ELEM_SIZE; + wInputStack += TARGET_POINTER_SIZE; } // process the return buffer parameter if (argit.HasRetBuffArg()) { numRegistersUsed++; - wSourceSlotEDX = wInputStack / STACK_ELEM_SIZE; - wInputStack += STACK_ELEM_SIZE; + wSourceSlotEDX = wInputStack / TARGET_POINTER_SIZE; + wInputStack += TARGET_POINTER_SIZE; } // process ordinary parameters @@ -864,17 +864,18 @@ void ComCallMethodDesc::InitRuntimeNativeInfo(MethodDesc *pStubMD) if (ArgIterator::IsArgumentInRegister(&numRegistersUsed, type, thValueType)) { - wSourceSlotEDX = wInputStack / STACK_ELEM_SIZE; - wInputStack += STACK_ELEM_SIZE; + wSourceSlotEDX = wInputStack / TARGET_POINTER_SIZE; + wInputStack += TARGET_POINTER_SIZE; } else { // we may need more stack slots for larger parameters - pOutputStack -= StackElemSize(cbSize) / STACK_ELEM_SIZE; - for (UINT slot = 0; slot < (StackElemSize(cbSize) / STACK_ELEM_SIZE); slot++) + UINT slotsCount = StackElemSize(cbSize) / TARGET_POINTER_SIZE; + pOutputStack -= slotsCount; + for (UINT slot = 0; slot < slotsCount; slot++) { pOutputStack[slot] = wInputStack; - wInputStack += STACK_ELEM_SIZE; + wInputStack += TARGET_POINTER_SIZE; } } } diff --git a/src/coreclr/vm/dllimport.cpp b/src/coreclr/vm/dllimport.cpp index afc31052b69ed..ea3ec6fbebba8 100644 --- a/src/coreclr/vm/dllimport.cpp +++ b/src/coreclr/vm/dllimport.cpp @@ -3230,7 +3230,8 @@ BOOL NDirect::MarshalingRequired( #endif if (i > 0) { - dwStackSize += StackElemSize(hndArgType.GetSize()); + const bool isValueType = true; + dwStackSize += StackElemSize(hndArgType.GetSize(), isValueType, hndArgType.IsFloatHfa()); } break; } @@ -3247,7 +3248,13 @@ BOOL NDirect::MarshalingRequired( { if (CorTypeInfo::IsPrimitiveType(type) || type == ELEMENT_TYPE_FNPTR) { - if (i > 0) dwStackSize += StackElemSize(CorTypeInfo::Size(type)); + + if (i > 0) + { + const bool isValueType = false; + const bool isFloatHfa = false; + dwStackSize += StackElemSize(CorTypeInfo::Size(type), isValueType, isFloatHfa); + } } else { diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 7a0ef55aec80a..04de3f584ae9e 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -102,19 +102,14 @@ EXTERN_C void SinglecastDelegateInvokeStub(); // Parameter size //********************************************************************** -typedef INT32 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - - +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) +{ + const unsigned stackSlotSize = 4; + return ALIGN_UP(parmSize, stackSlotSize); +} #include "stublinkerx86.h" - - -// !! This expression assumes STACK_ELEM_SIZE is a power of 2. -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) - - //********************************************************************** // Frames //********************************************************************** diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index b20bad978e55b..24b8080fcc322 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -2758,7 +2758,10 @@ class DynamicMethodDesc : public StoredSigMethodDesc void SetNativeStackArgSize(WORD cbArgSize) { LIMITED_METHOD_CONTRACT; - _ASSERTE(IsILStub() && (cbArgSize % TARGET_POINTER_SIZE) == 0); + _ASSERTE(IsILStub()); +#if !defined(OSX_ARM64_ABI) + _ASSERTE((cbArgSize % TARGET_POINTER_SIZE) == 0); +#endif m_dwExtendedFlags = (m_dwExtendedFlags & ~nomdStackArgSize) | ((DWORD)cbArgSize << 16); } diff --git a/src/coreclr/vm/mlinfo.cpp b/src/coreclr/vm/mlinfo.cpp index 1097bbea2a6ad..71788db9efd5d 100644 --- a/src/coreclr/vm/mlinfo.cpp +++ b/src/coreclr/vm/mlinfo.cpp @@ -2873,18 +2873,27 @@ void MarshalInfo::SetupArgumentSizes() } CONTRACTL_END; + const unsigned targetPointerSize = TARGET_POINTER_SIZE; + const bool pointerIsValueType = false; + const bool pointerIsFloatHfa = false; + _ASSERTE(targetPointerSize == StackElemSize(TARGET_POINTER_SIZE, pointerIsValueType, pointerIsFloatHfa)); + if (m_byref) { - m_nativeArgSize = StackElemSize(TARGET_POINTER_SIZE); + m_nativeArgSize = targetPointerSize; } else { - m_nativeArgSize = StackElemSize(GetNativeSize(m_type)); + const bool isValueType = IsValueClass(m_type); + const bool isFloatHfa = isValueType && (m_pMT->GetHFAType() == CORINFO_HFA_ELEM_FLOAT); + m_nativeArgSize = StackElemSize(GetNativeSize(m_type), isValueType, isFloatHfa); } #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (m_nativeArgSize > ENREGISTERED_PARAMTYPE_MAXSIZE) - m_nativeArgSize = StackElemSize(TARGET_POINTER_SIZE); + { + m_nativeArgSize = targetPointerSize; + } #endif // ENREGISTERED_PARAMTYPE_MAXSIZE } @@ -2909,16 +2918,8 @@ UINT16 MarshalInfo::GetNativeSize(MarshalType mtype) if (nativeSize == VARIABLESIZE) { - switch (mtype) - { - case MARSHAL_TYPE_BLITTABLEVALUECLASS: - case MARSHAL_TYPE_VALUECLASS: - case MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR: - return (UINT16) m_pMT->GetNativeSize(); - - default: - _ASSERTE(0); - } + _ASSERTE(IsValueClass(mtype)); + return (UINT16) m_pMT->GetNativeSize(); } return nativeSize; @@ -2945,6 +2946,20 @@ bool MarshalInfo::IsInOnly(MarshalType mtype) return ILMarshalerIsInOnly[mtype]; } +bool MarshalInfo::IsValueClass(MarshalType mtype) +{ + switch (mtype) + { + case MARSHAL_TYPE_BLITTABLEVALUECLASS: + case MARSHAL_TYPE_VALUECLASS: + case MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR: + return true; + + default: + return false; + } +} + OVERRIDEPROC MarshalInfo::GetArgumentOverrideProc(MarshalType mtype) { CONTRACTL diff --git a/src/coreclr/vm/mlinfo.h b/src/coreclr/vm/mlinfo.h index 792d078220f56..3124c8ce5cf7c 100644 --- a/src/coreclr/vm/mlinfo.h +++ b/src/coreclr/vm/mlinfo.h @@ -490,6 +490,7 @@ class MarshalInfo UINT16 GetNativeSize(MarshalType mtype); static bool IsInOnly(MarshalType mtype); + static bool IsValueClass(MarshalType mtype); static OVERRIDEPROC GetArgumentOverrideProc(MarshalType mtype); static RETURNOVERRIDEPROC GetReturnOverrideProc(MarshalType mtype); diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp index c4267ae151f36..3491dcb348944 100644 --- a/src/coreclr/vm/reflectioninvocation.cpp +++ b/src/coreclr/vm/reflectioninvocation.cpp @@ -828,7 +828,8 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod, CallDescrData callDescrData; callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock); - callDescrData.numStackSlots = nStackBytes / STACK_ELEM_SIZE; + _ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0); + callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE; #ifdef CALLDESCR_ARGREGS callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters()); #endif diff --git a/src/coreclr/vm/siginfo.cpp b/src/coreclr/vm/siginfo.cpp index 3f26bc8c87ac1..ca64386ae217b 100644 --- a/src/coreclr/vm/siginfo.cpp +++ b/src/coreclr/vm/siginfo.cpp @@ -2598,7 +2598,7 @@ UINT MetaSig::GetElemSize(CorElementType etype, TypeHandle thValueType) // Returns size of that element in bytes. This is the minimum size that a // field of this type would occupy inside an object. // -UINT SigPointer::SizeOf(Module* pModule, const SigTypeContext *pTypeContext) const +UINT SigPointer::SizeOf(Module* pModule, const SigTypeContext *pTypeContext, TypeHandle* pTypeHandle) const { CONTRACTL { @@ -2613,9 +2613,8 @@ UINT SigPointer::SizeOf(Module* pModule, const SigTypeContext *pTypeContext) con } CONTRACTL_END - TypeHandle thValueType; - CorElementType etype = PeekElemTypeNormalized(pModule, pTypeContext, &thValueType); - return MetaSig::GetElemSize(etype, thValueType); + CorElementType etype = PeekElemTypeNormalized(pModule, pTypeContext, pTypeHandle); + return MetaSig::GetElemSize(etype, *pTypeHandle); } #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/siginfo.hpp b/src/coreclr/vm/siginfo.hpp index 276a26bbe77fd..e638463a28918 100644 --- a/src/coreclr/vm/siginfo.hpp +++ b/src/coreclr/vm/siginfo.hpp @@ -197,7 +197,7 @@ class SigPointer : public SigParser // Returns size of that element in bytes. This is the minimum size that a // field of this type would occupy inside an object. //------------------------------------------------------------------------ - UINT SizeOf(Module* pModule, const SigTypeContext *pTypeContext) const; + UINT SizeOf(Module* pModule, const SigTypeContext *pTypeContext, TypeHandle* pTypeHandle) const; private: @@ -856,7 +856,8 @@ class MetaSig { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; - return m_pRetType.SizeOf(m_pModule, &m_typeContext); + TypeHandle thValueType; + return m_pRetType.SizeOf(m_pModule, &m_typeContext, &thValueType); } //------------------------------------------------------------------ diff --git a/src/coreclr/vm/typehandle.cpp b/src/coreclr/vm/typehandle.cpp index 8b494f07eb2d0..b198c87467c92 100644 --- a/src/coreclr/vm/typehandle.cpp +++ b/src/coreclr/vm/typehandle.cpp @@ -388,6 +388,7 @@ Instantiation TypeHandle::GetInstantiation() const BOOL TypeHandle::IsValueType() const { LIMITED_METHOD_DAC_CONTRACT; + _ASSERTE(!IsNull()); if (!IsTypeDesc()) return AsMethodTable()->IsValueType(); else return AsTypeDesc()->IsNativeValueType(); @@ -433,6 +434,16 @@ CorInfoHFAElemType TypeHandle::GetHFAType() const return CORINFO_HFA_ELEM_NONE; } +bool TypeHandle::IsFloatHfa() const +{ + WRAPPER_NO_CONTRACT; + if (IsNull() || !IsHFA()) + { + return false; + } + return (GetHFAType() == CORINFO_HFA_ELEM_FLOAT); +} + #ifdef FEATURE_64BIT_ALIGNMENT bool TypeHandle::RequiresAlign8() const diff --git a/src/coreclr/vm/typehandle.h b/src/coreclr/vm/typehandle.h index 387a75b362138..e623d4a8cec84 100644 --- a/src/coreclr/vm/typehandle.h +++ b/src/coreclr/vm/typehandle.h @@ -373,6 +373,8 @@ class TypeHandle bool IsHFA() const; CorInfoHFAElemType GetHFAType() const; + bool IsFloatHfa() const; + #ifdef FEATURE_64BIT_ALIGNMENT bool RequiresAlign8() const; #endif // FEATURE_64BIT_ALIGNMENT diff --git a/src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs new file mode 100644 index 0000000000000..11171e15c4088 --- /dev/null +++ b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +public class TestSmallStackArgsClass +{ + public TestSmallStackArgsClass() + { } + + [MethodImpl(MethodImplOptions.NoInlining)] + public int TestSmallStackArgsMethod(short s1, short s2, short s3, short s4, short s5, short s6, short s7, short s8, short s9, short s10, short s11, short s12) + { + if (s1 != 1 || s2 != 2 || s3 != 3 || s4 != 4 || s5 != 5 || s6 != 6 || s7 != 7 || s8 != 8 || s9 != 9 || s10 != 10 || s11 != 11 || s12 != 12) + { + Console.WriteLine("small stack arguments do not match."); + Console.WriteLine("Expected: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12"); + Console.WriteLine("Actual: " + s1 + ", " + s2 + ", " + s3 + ", " + s4 + ", " + s5 + ", " + s6 + ", " + s7 + ", " + s8 + ", " + s9 + ", " + s10 + ", " + s11 + ", " + s12); + return 101; + } + return 100; + } +} + +public class TestMethodInfo +{ + + [MethodImpl(MethodImplOptions.NoInlining)] + public static int InvokeReflection(object testClassObject, MethodInfo testMethod, object[] args) + { + + object retValue = testMethod.Invoke(testClassObject, args); + int r = Convert.ToInt32(retValue); + return r; + } + + + public static int Main() + { + Type testClass = Type.GetType("TestSmallStackArgsClass"); + ConstructorInfo testConstructor = testClass.GetConstructor(Type.EmptyTypes); + object testClassObject = testConstructor.Invoke(new object[] { }); + + MethodInfo testMethod = testClass.GetMethod("TestSmallStackArgsMethod"); + var args = new object[12]; + for (short i = 0; i < 12; ++i) + { + args[i] = (short)(i + 1); + } + + int r = InvokeReflection(testClassObject, testMethod, args); + + if (r != 100) + { + Console.WriteLine("The test failed"); + return 101; + } + Console.WriteLine("The test passed"); + return 100; + } +} diff --git a/src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj new file mode 100644 index 0000000000000..adc3b7203ae20 --- /dev/null +++ b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj @@ -0,0 +1,13 @@ + + + Exe + 0 + + + None + True + + + + +