From 7b1024cb70f96d01c58b6a60235f5a8464127e6e Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 5 Jan 2021 11:06:30 -0800 Subject: [PATCH 01/25] Add `MarshalInfo::IsValueClass`. --- src/coreclr/vm/mlinfo.cpp | 26 ++++++++++++++++---------- src/coreclr/vm/mlinfo.h | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/coreclr/vm/mlinfo.cpp b/src/coreclr/vm/mlinfo.cpp index 1097bbea2a6ad..2b550d5ce7859 100644 --- a/src/coreclr/vm/mlinfo.cpp +++ b/src/coreclr/vm/mlinfo.cpp @@ -2909,16 +2909,8 @@ UINT16 MarshalInfo::GetNativeSize(MarshalType mtype) if (nativeSize == VARIABLESIZE) { - switch (mtype) - { - case MARSHAL_TYPE_BLITTABLEVALUECLASS: - case MARSHAL_TYPE_VALUECLASS: - case MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR: - return (UINT16) m_pMT->GetNativeSize(); - - default: - _ASSERTE(0); - } + _ASSERTE(IsValueClass(mtype)); + return (UINT16) m_pMT->GetNativeSize(); } return nativeSize; @@ -2945,6 +2937,20 @@ bool MarshalInfo::IsInOnly(MarshalType mtype) return ILMarshalerIsInOnly[mtype]; } +bool MarshalInfo::IsValueClass(MarshalType mtype) +{ + switch (mtype) + { + case MARSHAL_TYPE_BLITTABLEVALUECLASS: + case MARSHAL_TYPE_VALUECLASS: + case MARSHAL_TYPE_BLITTABLEVALUECLASSWITHCOPYCTOR: + return true; + + default: + return false; + } +} + OVERRIDEPROC MarshalInfo::GetArgumentOverrideProc(MarshalType mtype) { CONTRACTL diff --git a/src/coreclr/vm/mlinfo.h b/src/coreclr/vm/mlinfo.h index 792d078220f56..3124c8ce5cf7c 100644 --- a/src/coreclr/vm/mlinfo.h +++ b/src/coreclr/vm/mlinfo.h @@ -490,6 +490,7 @@ class MarshalInfo UINT16 GetNativeSize(MarshalType mtype); static bool IsInOnly(MarshalType mtype); + static bool IsValueClass(MarshalType mtype); static OVERRIDEPROC GetArgumentOverrideProc(MarshalType mtype); static RETURNOVERRIDEPROC GetReturnOverrideProc(MarshalType mtype); From 30c0a33f8103b2aab95727a7306011e38c9a1bb4 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 5 Jan 2021 14:10:53 -0800 Subject: [PATCH 02/25] Add `TypeHandle* pTypeHandle` to `SizeOf`. --- .../classlibnative/bcltype/varargsnative.cpp | 20 +++++++++---------- src/coreclr/vm/siginfo.cpp | 7 +++---- src/coreclr/vm/siginfo.hpp | 5 +++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/coreclr/classlibnative/bcltype/varargsnative.cpp b/src/coreclr/classlibnative/bcltype/varargsnative.cpp index 55dd48bf280ec..13ba26566ac06 100644 --- a/src/coreclr/classlibnative/bcltype/varargsnative.cpp +++ b/src/coreclr/classlibnative/bcltype/varargsnative.cpp @@ -138,9 +138,9 @@ void AdvanceArgPtr(VARARGS *data) break; SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic - SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext); - SIZE_T cbArg = StackElemSize(cbRaw); - + TypeHandle thValueType; + unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext, &thValueType); + unsigned cbArg = StackElemSize(cbRaw); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -263,9 +263,9 @@ VarArgsNative::Init2, } SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic - SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext); - SIZE_T cbArg = StackElemSize(cbRaw); - + TypeHandle thValueType; + unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); + unsigned cbArg = StackElemSize(cbRaw); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -401,7 +401,7 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC TypeHandle typehandle = refType->GetType(); _ASSERTE(_this != NULL); - UINT size = 0; + unsigned size = 0; CorElementType typ = typehandle.GetInternalCorElementType(); if (CorTypeInfo::IsPrimitiveType(typ)) @@ -472,9 +472,9 @@ VarArgsNative::GetNextArgHelper( _ASSERTE(data->RemainingArgs != 0); SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic - SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext); - SIZE_T cbArg = StackElemSize(cbRaw); - + TypeHandle thValueType; + unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); + unsigned cbArg = StackElemSize(cbRaw); AdjustArgPtrForAlignment(data, cbArg); // Get a pointer to the beginning of the argument. diff --git a/src/coreclr/vm/siginfo.cpp b/src/coreclr/vm/siginfo.cpp index 3f26bc8c87ac1..ca64386ae217b 100644 --- a/src/coreclr/vm/siginfo.cpp +++ b/src/coreclr/vm/siginfo.cpp @@ -2598,7 +2598,7 @@ UINT MetaSig::GetElemSize(CorElementType etype, TypeHandle thValueType) // Returns size of that element in bytes. This is the minimum size that a // field of this type would occupy inside an object. // -UINT SigPointer::SizeOf(Module* pModule, const SigTypeContext *pTypeContext) const +UINT SigPointer::SizeOf(Module* pModule, const SigTypeContext *pTypeContext, TypeHandle* pTypeHandle) const { CONTRACTL { @@ -2613,9 +2613,8 @@ UINT SigPointer::SizeOf(Module* pModule, const SigTypeContext *pTypeContext) con } CONTRACTL_END - TypeHandle thValueType; - CorElementType etype = PeekElemTypeNormalized(pModule, pTypeContext, &thValueType); - return MetaSig::GetElemSize(etype, thValueType); + CorElementType etype = PeekElemTypeNormalized(pModule, pTypeContext, pTypeHandle); + return MetaSig::GetElemSize(etype, *pTypeHandle); } #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/siginfo.hpp b/src/coreclr/vm/siginfo.hpp index 276a26bbe77fd..e638463a28918 100644 --- a/src/coreclr/vm/siginfo.hpp +++ b/src/coreclr/vm/siginfo.hpp @@ -197,7 +197,7 @@ class SigPointer : public SigParser // Returns size of that element in bytes. This is the minimum size that a // field of this type would occupy inside an object. //------------------------------------------------------------------------ - UINT SizeOf(Module* pModule, const SigTypeContext *pTypeContext) const; + UINT SizeOf(Module* pModule, const SigTypeContext *pTypeContext, TypeHandle* pTypeHandle) const; private: @@ -856,7 +856,8 @@ class MetaSig { WRAPPER_NO_CONTRACT; SUPPORTS_DAC; - return m_pRetType.SizeOf(m_pModule, &m_typeContext); + TypeHandle thValueType; + return m_pRetType.SizeOf(m_pModule, &m_typeContext, &thValueType); } //------------------------------------------------------------------ From 44c85e8619b99587608785e5c8cd3596e0a9fbf2 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 6 Jan 2021 14:04:01 -0800 Subject: [PATCH 03/25] Add a few asserts/start using inline function instead of macro. --- .../classlibnative/bcltype/varargsnative.cpp | 24 +++++++++++-------- src/coreclr/gcinfo/gcinfoencoder.cpp | 2 ++ src/coreclr/vm/amd64/cgencpu.h | 8 +++++-- src/coreclr/vm/arm/cgencpu.h | 8 +++++-- src/coreclr/vm/arm64/cgencpu.h | 10 ++++---- src/coreclr/vm/callhelpers.cpp | 3 ++- src/coreclr/vm/callingconvention.h | 15 ++++++++---- src/coreclr/vm/clrvarargs.cpp | 3 ++- src/coreclr/vm/dllimport.cpp | 10 ++++++-- src/coreclr/vm/i386/cgencpu.h | 13 +++++----- src/coreclr/vm/method.hpp | 5 +++- src/coreclr/vm/mlinfo.cpp | 12 +++++++--- src/coreclr/vm/reflectioninvocation.cpp | 3 ++- src/coreclr/vm/typehandle.cpp | 1 + 14 files changed, 78 insertions(+), 39 deletions(-) diff --git a/src/coreclr/classlibnative/bcltype/varargsnative.cpp b/src/coreclr/classlibnative/bcltype/varargsnative.cpp index 13ba26566ac06..5a1c755cde9ac 100644 --- a/src/coreclr/classlibnative/bcltype/varargsnative.cpp +++ b/src/coreclr/classlibnative/bcltype/varargsnative.cpp @@ -21,7 +21,7 @@ // pointer to achieve such an alignment for the next argument on those platforms (otherwise it is a no-op). // NOTE: the debugger has its own implementation of this algorithm in Debug\DI\RsType.cpp, CordbType::RequiresAlign8() // so if you change this implementation be sure to update the debugger's version as well. -static void AdjustArgPtrForAlignment(VARARGS *pData, size_t cbArg) +static void AdjustArgPtrForAlignment(VARARGS *pData, unsigned cbArg) { #ifdef TARGET_ARM // Only 64-bit primitives or value types with embedded 64-bit primitives are aligned on 64-bit boundaries. @@ -114,7 +114,8 @@ static void InitCommon(VARARGS *data, VASigCookie** cookie) // which is the start of the first fixed arg (arg1). // Always skip over the varargs_cookie. - data->ArgPtr += StackElemSize(sizeof(LPVOID)); + const bool isValueType = false; + data->ArgPtr += StackElemSize(sizeof(LPVOID), isValueType); #endif } @@ -139,8 +140,9 @@ void AdvanceArgPtr(VARARGS *data) SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic TypeHandle thValueType; - unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext, &thValueType); - unsigned cbArg = StackElemSize(cbRaw); + const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext, &thValueType); + const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); + unsigned cbArg = StackElemSize(cbRaw, isValueType); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -265,7 +267,8 @@ VarArgsNative::Init2, SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic TypeHandle thValueType; unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); - unsigned cbArg = StackElemSize(cbRaw); + const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); + unsigned cbArg = StackElemSize(cbRaw, isValueType); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -402,6 +405,7 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC _ASSERTE(_this != NULL); unsigned size = 0; + bool isValueType = false; CorElementType typ = typehandle.GetInternalCorElementType(); if (CorTypeInfo::IsPrimitiveType(typ)) @@ -414,15 +418,14 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC } else if (typ == ELEMENT_TYPE_VALUETYPE) { + isValueType = true; size = typehandle.AsMethodTable()->GetNativeSize(); } else { COMPlusThrow(kNotSupportedException, W("NotSupported_Type")); } - - size = StackElemSize(size); - + size = StackElemSize(size, isValueType); AdjustArgPtrForAlignment(_this, size); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE @@ -473,8 +476,9 @@ VarArgsNative::GetNextArgHelper( SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic TypeHandle thValueType; - unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); - unsigned cbArg = StackElemSize(cbRaw); + const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); + const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); + unsigned cbArg = StackElemSize(cbRaw, isValueType); AdjustArgPtrForAlignment(data, cbArg); // Get a pointer to the beginning of the argument. diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index b906c3e147a74..8f56607e22ba5 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -577,8 +577,10 @@ GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcSta _ASSERTE( (flags & (GC_SLOT_IS_REGISTER | GC_SLOT_IS_DELETED)) == 0 ); +#if !defined(OSX_ARM64_ABI) // the spOffset for the stack slot is required to be pointer size aligned _ASSERTE((spOffset % TARGET_POINTER_SIZE) == 0); +#endif m_SlotTable[ m_NumSlots ].Slot.Stack.SpOffset = spOffset; m_SlotTable[ m_NumSlots ].Slot.Stack.Base = spBase; diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index c7ecc88d2c4d2..d47003974f60b 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -148,8 +148,12 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) typedef INT64 StackElemType; #define STACK_ELEM_SIZE sizeof(StackElemType) -// !! This expression assumes STACK_ELEM_SIZE is a power of 2. -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false) +{ + // The next expression assumes STACK_ELEM_SIZE is a power of 2. + static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1); +} //********************************************************************** // Frames diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 3739f4ce5dc66..f121746c9eb5a 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -102,8 +102,12 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); typedef INT32 StackElemType; #define STACK_ELEM_SIZE sizeof(StackElemType) -// !! This expression assumes STACK_ELEM_SIZE is a power of 2. -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false) +{ + // The next expression assumes STACK_ELEM_SIZE is a power of 2. + static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1); +} //********************************************************************** // Frames diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index b28ac4d99c9fe..866e6390b749e 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -84,10 +84,12 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) typedef INT64 StackElemType; #define STACK_ELEM_SIZE sizeof(StackElemType) -// The expression below assumes STACK_ELEM_SIZE is a power of 2, so check that. -static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); - -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType) +{ + // The expression below assumes STACK_ELEM_SIZE is a power of 2, so check that. + static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return ((parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); +} // // JIT HELPERS. diff --git a/src/coreclr/vm/callhelpers.cpp b/src/coreclr/vm/callhelpers.cpp index 492869f1c328f..d5493b661bc32 100644 --- a/src/coreclr/vm/callhelpers.cpp +++ b/src/coreclr/vm/callhelpers.cpp @@ -517,7 +517,8 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT * CallDescrData callDescrData; callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock); - callDescrData.numStackSlots = nStackBytes / STACK_ELEM_SIZE; + _ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0); + callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE; #ifdef CALLDESCR_ARGREGS callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters()); #endif diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 129970032a0a4..4f8261a37ba07 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -218,7 +218,9 @@ struct TransitionBlock #if defined(UNIX_AMD64_ABI) _ASSERTE(offset != TransitionBlock::StructInRegsOffset); #endif - return (offset - GetOffsetOfArgumentRegisters()) / TARGET_POINTER_SIZE; + offset -= GetOffsetOfArgumentRegisters(); + _ASSERTE((offset % TARGET_POINTER_SIZE) == 0); + return offset / TARGET_POINTER_SIZE; } static UINT GetStackArgumentIndexFromOffset(int offset) @@ -319,6 +321,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (!(m_dwFlags & SIZE_OF_ARG_STACK_COMPUTED)) ForceSigWalk(); _ASSERTE((m_dwFlags & SIZE_OF_ARG_STACK_COMPUTED) != 0); + _ASSERTE((m_nSizeOfArgStack % TARGET_POINTER_SIZE) == 0); return m_nSizeOfArgStack; } @@ -336,7 +339,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE // The argument registers are not included in the stack size on AMD64 size += ARGUMENTREGISTERS_SIZE; #endif - + _ASSERTE((size % TARGET_POINTER_SIZE) == 0); return size; } @@ -1428,9 +1431,10 @@ int ArgIteratorTemplate::GetNextOffset() default: break; } + const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); + const int cbArg = StackElemSize(argSize, isValueType); + const int cArgSlots = cbArg / STACK_ELEM_SIZE; - int cbArg = StackElemSize(argSize); - int cArgSlots = cbArg / STACK_ELEM_SIZE; if (cFPRegs>0 && !this->IsVarArg()) { @@ -1734,7 +1738,8 @@ void ArgIteratorTemplate::ForceSigWalk() stackElemSize = STACK_ELEM_SIZE; #endif // UNIX_AMD64_ABI #else // TARGET_AMD64 - stackElemSize = StackElemSize(GetArgSize()); + const bool isValueType = (GetArgType() == ELEMENT_TYPE_VALUETYPE); + stackElemSize = StackElemSize(GetArgSize(), isValueType); #if defined(ENREGISTERED_PARAMTYPE_MAXSIZE) if (IsArgPassedByRef()) stackElemSize = STACK_ELEM_SIZE; diff --git a/src/coreclr/vm/clrvarargs.cpp b/src/coreclr/vm/clrvarargs.cpp index 303220b4464a6..33bec19e93e4e 100644 --- a/src/coreclr/vm/clrvarargs.cpp +++ b/src/coreclr/vm/clrvarargs.cpp @@ -76,7 +76,8 @@ VARARGS::MarshalToUnmanagedVaList( case ELEMENT_TYPE_U: case ELEMENT_TYPE_PTR: { - DWORD cbSize = StackElemSize(CorTypeInfo::Size(elemType)); + const bool isValueType = false; + DWORD cbSize = StackElemSize(CorTypeInfo::Size(elemType), isValueType); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (cbSize > ENREGISTERED_PARAMTYPE_MAXSIZE) diff --git a/src/coreclr/vm/dllimport.cpp b/src/coreclr/vm/dllimport.cpp index afc31052b69ed..ff6b4982333ca 100644 --- a/src/coreclr/vm/dllimport.cpp +++ b/src/coreclr/vm/dllimport.cpp @@ -3230,7 +3230,8 @@ BOOL NDirect::MarshalingRequired( #endif if (i > 0) { - dwStackSize += StackElemSize(hndArgType.GetSize()); + const bool isValueType = true; + dwStackSize += StackElemSize(hndArgType.GetSize(), isValueType); } break; } @@ -3247,7 +3248,12 @@ BOOL NDirect::MarshalingRequired( { if (CorTypeInfo::IsPrimitiveType(type) || type == ELEMENT_TYPE_FNPTR) { - if (i > 0) dwStackSize += StackElemSize(CorTypeInfo::Size(type)); + + if (i > 0) + { + const bool isValueType = false; + dwStackSize += StackElemSize(CorTypeInfo::Size(type), isValueType); + } } else { diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 7a0ef55aec80a..054059a263a2a 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -105,16 +105,15 @@ EXTERN_C void SinglecastDelegateInvokeStub(); typedef INT32 StackElemType; #define STACK_ELEM_SIZE sizeof(StackElemType) - +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */) +{ + // The next expression assumes STACK_ELEM_SIZE is a power of 2. + static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1); +} #include "stublinkerx86.h" - - -// !! This expression assumes STACK_ELEM_SIZE is a power of 2. -#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1))) - - //********************************************************************** // Frames //********************************************************************** diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index b20bad978e55b..24b8080fcc322 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -2758,7 +2758,10 @@ class DynamicMethodDesc : public StoredSigMethodDesc void SetNativeStackArgSize(WORD cbArgSize) { LIMITED_METHOD_CONTRACT; - _ASSERTE(IsILStub() && (cbArgSize % TARGET_POINTER_SIZE) == 0); + _ASSERTE(IsILStub()); +#if !defined(OSX_ARM64_ABI) + _ASSERTE((cbArgSize % TARGET_POINTER_SIZE) == 0); +#endif m_dwExtendedFlags = (m_dwExtendedFlags & ~nomdStackArgSize) | ((DWORD)cbArgSize << 16); } diff --git a/src/coreclr/vm/mlinfo.cpp b/src/coreclr/vm/mlinfo.cpp index 2b550d5ce7859..acd3f170b68c0 100644 --- a/src/coreclr/vm/mlinfo.cpp +++ b/src/coreclr/vm/mlinfo.cpp @@ -2873,18 +2873,24 @@ void MarshalInfo::SetupArgumentSizes() } CONTRACTL_END; + const unsigned targetPointerSize = TARGET_POINTER_SIZE; + const bool pointerIsValueType = false; + _ASSERTE(targetPointerSize == StackElemSize(TARGET_POINTER_SIZE, pointerIsValueType)); + if (m_byref) { - m_nativeArgSize = StackElemSize(TARGET_POINTER_SIZE); + m_nativeArgSize = targetPointerSize; } else { - m_nativeArgSize = StackElemSize(GetNativeSize(m_type)); + m_nativeArgSize = StackElemSize(GetNativeSize(m_type), IsValueClass(m_type)); } #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (m_nativeArgSize > ENREGISTERED_PARAMTYPE_MAXSIZE) - m_nativeArgSize = StackElemSize(TARGET_POINTER_SIZE); + { + m_nativeArgSize = targetPointerSize; + } #endif // ENREGISTERED_PARAMTYPE_MAXSIZE } diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp index c4267ae151f36..3491dcb348944 100644 --- a/src/coreclr/vm/reflectioninvocation.cpp +++ b/src/coreclr/vm/reflectioninvocation.cpp @@ -828,7 +828,8 @@ FCIMPL5(Object*, RuntimeMethodHandle::InvokeMethod, CallDescrData callDescrData; callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock); - callDescrData.numStackSlots = nStackBytes / STACK_ELEM_SIZE; + _ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0); + callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE; #ifdef CALLDESCR_ARGREGS callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters()); #endif diff --git a/src/coreclr/vm/typehandle.cpp b/src/coreclr/vm/typehandle.cpp index 8b494f07eb2d0..b27feaec2b0f3 100644 --- a/src/coreclr/vm/typehandle.cpp +++ b/src/coreclr/vm/typehandle.cpp @@ -388,6 +388,7 @@ Instantiation TypeHandle::GetInstantiation() const BOOL TypeHandle::IsValueType() const { LIMITED_METHOD_DAC_CONTRACT; + _ASSERTE(!IsNull()); if (!IsTypeDesc()) return AsMethodTable()->IsValueType(); else return AsTypeDesc()->IsNativeValueType(); From 68242187ee1051a0fb426cfacd3b9fa0579e1a01 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 6 Jan 2021 14:43:39 -0800 Subject: [PATCH 04/25] use TARGET_POINTER_SIZE instead of STACK_ELEM_SIZE. --- src/coreclr/vm/callingconvention.h | 6 +++--- src/coreclr/vm/comdelegate.cpp | 6 +++--- src/coreclr/vm/comtoclrcall.cpp | 21 +++++++++++---------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 4f8261a37ba07..5e6cfeb0517f8 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -764,7 +764,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); int argOnStackSize; if (IsArgPassedByRef()) - argOnStackSize = STACK_ELEM_SIZE; + argOnStackSize = TARGET_POINTER_SIZE; else argOnStackSize = GetArgSize(); pLoc->m_cStack = (argOnStackSize + STACK_ELEM_SIZE - 1) / STACK_ELEM_SIZE; @@ -1198,9 +1198,9 @@ int ArgIteratorTemplate::GetNextOffset() m_fArgInRegisters = false; - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * STACK_ELEM_SIZE; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * TARGET_POINTER_SIZE; - int cArgSlots = cbArg / STACK_ELEM_SIZE; + int cArgSlots = cbArg / TARGET_POINTER_SIZE; m_idxStack += cArgSlots; return argOfs; diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 7a52f6fdedc32..e0c7f179fd60a 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -605,13 +605,13 @@ VOID GenerateShuffleArray(MethodDesc* pInvoke, MethodDesc *pTargetMeth, SArrayAppend(entry); - cbSize -= STACK_ELEM_SIZE; + cbSize -= TARGET_POINTER_SIZE; } while (cbSize > 0); } diff --git a/src/coreclr/vm/comtoclrcall.cpp b/src/coreclr/vm/comtoclrcall.cpp index 6a68c2c2009d1..f1d11c2be076d 100644 --- a/src/coreclr/vm/comtoclrcall.cpp +++ b/src/coreclr/vm/comtoclrcall.cpp @@ -226,7 +226,7 @@ inline static void InvokeStub(ComCallMethodDesc *pCMD, PCODE pManagedTarget, OBJ INT_PTR dangerousThis; *(OBJECTREF *)&dangerousThis = orThis; - DWORD dwStackSlots = pCMD->GetNumStackBytes() / STACK_ELEM_SIZE; + DWORD dwStackSlots = pCMD->GetNumStackBytes() / TARGET_POINTER_SIZE; // Managed code is generally "THROWS" and we have no exception handler here that the contract system can // see. We ensure that we don't get exceptions here by generating a try/catch in the IL stub that covers @@ -827,7 +827,7 @@ void ComCallMethodDesc::InitRuntimeNativeInfo(MethodDesc *pStubMD) NewArrayHolder pwStubStackSlotOffsets; UINT16 *pOutputStack = NULL; - UINT16 wStubStackSlotCount = static_cast(dwArgStack) / STACK_ELEM_SIZE; + UINT16 wStubStackSlotCount = static_cast(dwArgStack) / TARGET_POINTER_SIZE; if (wStubStackSlotCount > 0) { pwStubStackSlotOffsets = new UINT16[wStubStackSlotCount]; @@ -843,15 +843,15 @@ void ComCallMethodDesc::InitRuntimeNativeInfo(MethodDesc *pStubMD) if (!pStubMD->IsStatic()) { numRegistersUsed++; - wInputStack += STACK_ELEM_SIZE; + wInputStack += TARGET_POINTER_SIZE; } // process the return buffer parameter if (argit.HasRetBuffArg()) { numRegistersUsed++; - wSourceSlotEDX = wInputStack / STACK_ELEM_SIZE; - wInputStack += STACK_ELEM_SIZE; + wSourceSlotEDX = wInputStack / TARGET_POINTER_SIZE; + wInputStack += TARGET_POINTER_SIZE; } // process ordinary parameters @@ -864,17 +864,18 @@ void ComCallMethodDesc::InitRuntimeNativeInfo(MethodDesc *pStubMD) if (ArgIterator::IsArgumentInRegister(&numRegistersUsed, type, thValueType)) { - wSourceSlotEDX = wInputStack / STACK_ELEM_SIZE; - wInputStack += STACK_ELEM_SIZE; + wSourceSlotEDX = wInputStack / TARGET_POINTER_SIZE; + wInputStack += TARGET_POINTER_SIZE; } else { // we may need more stack slots for larger parameters - pOutputStack -= StackElemSize(cbSize) / STACK_ELEM_SIZE; - for (UINT slot = 0; slot < (StackElemSize(cbSize) / STACK_ELEM_SIZE); slot++) + UINT slotsCount = StackElemSize(cbSize) / TARGET_POINTER_SIZE; + pOutputStack -= slotsCount; + for (UINT slot = 0; slot < slotsCount; slot++) { pOutputStack[slot] = wInputStack; - wInputStack += STACK_ELEM_SIZE; + wInputStack += TARGET_POINTER_SIZE; } } } From 2ce2c530f919eda2f7abc2fe4983e8a8b0b45a52 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 6 Jan 2021 16:49:04 -0800 Subject: [PATCH 05/25] Use `m_curOfs` instead of `m_idxStack` in `ArgIteratorBase` on all platforms. Before some platforms were using stackSlots, some curOfs in bytes. --- src/coreclr/vm/callingconvention.h | 73 +++++++++++++++--------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 5e6cfeb0517f8..1785f2a3e05ef 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -683,19 +683,21 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE return; } - int cSlots = (GetArgSize() + 7)/ 8; - + const unsigned argSizeInBytes = GetArgSize(); + unsigned cSlots; // Composites greater than 16 bytes are passed by reference - if (GetArgType() == ELEMENT_TYPE_VALUETYPE && GetArgSize() > ENREGISTERED_PARAMTYPE_MAXSIZE) + if ((GetArgType() == ELEMENT_TYPE_VALUETYPE) && (argSizeInBytes > ENREGISTERED_PARAMTYPE_MAXSIZE)) { cSlots = 1; } + else + { + cSlots = (argSizeInBytes + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + } -#ifdef TARGET_ARM64 // Sanity check to make sure no caller is trying to get an ArgLocDesc that // describes the return buffer reg field that's in the TransitionBlock. _ASSERTE(argOffset != TransitionBlock::GetOffsetOfRetBuffArgReg()); -#endif if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { @@ -795,28 +797,22 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE #endif #endif -#ifdef TARGET_AMD64 + int m_curOfs; // Current position of the stack iterator, in bytes + #ifdef UNIX_AMD64_ABI int m_idxGenReg; // Next general register to be assigned a value - int m_idxStack; // Next stack slot to be assigned a value int m_idxFPReg; // Next floating point register to be assigned a value bool m_fArgInRegisters; // Indicates that the current argument is stored in registers -#else - int m_curOfs; // Current position of the stack iterator -#endif #endif #ifdef TARGET_ARM int m_idxGenReg; // Next general register to be assigned a value - int m_idxStack; // Next stack slot to be assigned a value - WORD m_wFPRegs; // Bitmask of available floating point argument registers (s0-s15/d0-d7) bool m_fRequires64BitAlignment; // Cached info about the current arg #endif #ifdef TARGET_ARM64 int m_idxGenReg; // Next general register to be assigned a value - int m_idxStack; // Next stack slot to be assigned a value int m_idxFPReg; // Next FP register to be assigned a value #endif @@ -1048,19 +1044,19 @@ int ArgIteratorTemplate::GetNextOffset() #elif defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI m_idxGenReg = numRegistersUsed; - m_idxStack = 0; + m_curOfs = 0; m_idxFPReg = 0; #else m_curOfs = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); #endif #elif defined(TARGET_ARM) m_idxGenReg = numRegistersUsed; - m_idxStack = 0; + m_curOfs = 0; m_wFPRegs = 0; #elif defined(TARGET_ARM64) m_idxGenReg = numRegistersUsed; - m_idxStack = 0; + m_curOfs = 0; m_idxFPReg = 0; #else @@ -1198,10 +1194,10 @@ int ArgIteratorTemplate::GetNextOffset() m_fArgInRegisters = false; - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * TARGET_POINTER_SIZE; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; int cArgSlots = cbArg / TARGET_POINTER_SIZE; - m_idxStack += cArgSlots; + m_curOfs += cbArg; return argOfs; #else @@ -1269,7 +1265,7 @@ int ArgIteratorTemplate::GetNextOffset() m_fRequires64BitAlignment = fRequiresAlign64Bit; int cbArg = StackElemSize(argSize); - int cArgSlots = cbArg / 4; + assert((cbArg % TARGET_POINTER_SIZE) == 0); // Ignore floating point argument placement in registers if we're dealing with a vararg function (the ABI // specifies this so that vararg processing on the callee side is simplified). @@ -1316,13 +1312,15 @@ int ArgIteratorTemplate::GetNextOffset() // Doubles or HFAs containing doubles need the stack aligned appropriately. if (fRequiresAlign64Bit) - m_idxStack = (int)ALIGN_UP(m_idxStack, 2); + { + m_curOfs = (int)ALIGN_UP(m_curOfs, TARGET_POINTER_SIZE * 2); + } // Indicate the stack location of the argument to the caller. - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 4; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; // Record the stack usage. - m_idxStack += cArgSlots; + m_curOfs += cbArg; return argOfs; } @@ -1358,9 +1356,9 @@ int ArgIteratorTemplate::GetNextOffset() m_idxGenReg = 4; - if (m_idxStack == 0) + if (m_curOfs == 0) { - m_idxStack += cArgSlots - cRemainingRegs; + m_curOfs += cbArg - cRemainingRegs * TARGET_POINTER_SIZE; return argOfs; } } @@ -1369,13 +1367,13 @@ int ArgIteratorTemplate::GetNextOffset() { // The argument requires 64-bit alignment. If it is going to be passed on the stack, align // the next stack slot. See step C.6 in the algorithm in the ABI spec. - m_idxStack = (int)ALIGN_UP(m_idxStack, 2); + m_curOfs = (int)ALIGN_UP(m_curOfs, TARGET_POINTER_SIZE * 2); } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 4; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; // Advance the stack pointer over the argument just placed. - m_idxStack += cArgSlots; + m_curOfs += cbArg; return argOfs; #elif defined(TARGET_ARM64) @@ -1433,9 +1431,6 @@ int ArgIteratorTemplate::GetNextOffset() } const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); const int cbArg = StackElemSize(argSize, isValueType); - const int cArgSlots = cbArg / STACK_ELEM_SIZE; - - if (cFPRegs>0 && !this->IsVarArg()) { if (cFPRegs + m_idxFPReg <= 8) @@ -1452,13 +1447,17 @@ int ArgIteratorTemplate::GetNextOffset() } else { +#if !defined(OSX_ARM64_ABI) + _ASSERTE((cbArg% TARGET_POINTER_SIZE) == 0); +#endif + const int regSlots = (cbArg + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; // Only x0-x7 are valid argument registers (x8 is always the return buffer) - if (m_idxGenReg + cArgSlots <= 8) + if (m_idxGenReg + regSlots <= 8) { // The entirety of the arg fits in the register slots. int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; - m_idxGenReg += cArgSlots; + m_idxGenReg += regSlots; return argOfs; } else @@ -1471,9 +1470,9 @@ int ArgIteratorTemplate::GetNextOffset() // into x0-x7, and any remaining stack arguments are placed normally. int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; - // Increase m_idxStack to account for the space used for the remainder of the arg after - // register slots are filled. - m_idxStack += (m_idxGenReg + cArgSlots - 8); + // Increase m_curOfs to account for the space used for the remainder of the arg after + // registers are filled. + m_curOfs += cbArg + (m_idxGenReg - 8) * TARGET_POINTER_SIZE; // We used up the remaining reg slots. m_idxGenReg = 8; @@ -1489,8 +1488,8 @@ int ArgIteratorTemplate::GetNextOffset() } } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_idxStack * 8; - m_idxStack += cArgSlots; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; + m_curOfs += cbArg; return argOfs; #else PORTABILITY_ASSERT("ArgIteratorTemplate::GetNextOffset"); From 1ff7b76d80f38043a5e85556e0faf2904f7f4909 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 6 Jan 2021 18:22:33 -0800 Subject: [PATCH 06/25] Use byte sizes and offsets in `ArgLocDesc`. Fix arm32. x86 fixes. use StackSize on ArgSizes Add `GetStackArgumentByteIndexFromOffset` and return back the old values for asserts. another fix --- src/coreclr/vm/callingconvention.h | 86 ++++++++++++++++++++---------- src/coreclr/vm/comdelegate.cpp | 17 ++++-- 2 files changed, 71 insertions(+), 32 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 1785f2a3e05ef..8673147f78bbc 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -42,6 +42,9 @@ struct ArgLocDesc int m_idxStack; // First stack slot used (or -1) int m_cStack; // Count of stack slots used (or 0) + int m_byteStackIndex; // Stack offset in bytes (or -1) + int m_byteStackSize; // Stack size in bytes + #if defined(UNIX_AMD64_ABI) EEClass* m_eeClass; // For structs passed in register, it points to the EEClass of the struct @@ -87,6 +90,8 @@ struct ArgLocDesc m_cGenReg = 0; m_idxStack = -1; m_cStack = 0; + m_byteStackIndex = -1; + m_byteStackSize = 0; #if defined(TARGET_ARM) m_fRequires64BitAlignment = FALSE; #endif @@ -227,7 +232,14 @@ struct TransitionBlock { LIMITED_METHOD_CONTRACT; - return (offset - TransitionBlock::GetOffsetOfArgs()) / STACK_ELEM_SIZE; + return (offset - TransitionBlock::GetOffsetOfArgs()) / TARGET_POINTER_SIZE; + } + + static UINT GetStackArgumentByteIndexFromOffset(int offset) + { + LIMITED_METHOD_CONTRACT; + + return (offset - TransitionBlock::GetOffsetOfArgs()); } #ifdef CALLDESCR_FPARGREGS @@ -599,16 +611,21 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); int cSlots = (GetArgSize() + 3) / 4; + const unsigned byteArgSize = StackElemSize(GetArgSize()); if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); _ASSERTE(cSlots == 1); - pLoc->m_cGenReg = cSlots; + _ASSERTE(byteArgSize <= TARGET_POINTER_SIZE); + pLoc->m_cGenReg = 1; } else { pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); pLoc->m_cStack = cSlots; + pLoc->m_byteStackSize = GetArgSize(); + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif @@ -623,12 +640,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->m_fRequires64BitAlignment = m_fRequires64BitAlignment; + const unsigned byteArgSize = StackElemSize(GetArgSize()); int cSlots = (GetArgSize() + 3) / 4; - if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 4; - pLoc->m_cFloatReg = cSlots; + pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / TARGET_POINTER_SIZE; + _ASSERTE(byteArgSize / TARGET_POINTER_SIZE == cSlots); + pLoc->m_cFloatReg = byteArgSize / TARGET_POINTER_SIZE; return; } @@ -636,22 +654,27 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - if (cSlots <= (4 - pLoc->m_idxGenReg)) + if ((int)byteArgSize <= (4 - pLoc->m_idxGenReg) * TARGET_POINTER_SIZE) { - pLoc->m_cGenReg = cSlots; + pLoc->m_cGenReg = (byteArgSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; } else { pLoc->m_cGenReg = 4 - pLoc->m_idxGenReg; - - pLoc->m_idxStack = 0; pLoc->m_cStack = cSlots - pLoc->m_cGenReg; + pLoc->m_idxStack = 0; + pLoc->m_byteStackIndex = 0; + pLoc->m_byteStackSize = byteArgSize - pLoc->m_cGenReg * TARGET_POINTER_SIZE; } } else { pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); pLoc->m_cStack = cSlots; + + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + pLoc->m_byteStackSize = byteArgSize; + _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif // TARGET_ARM @@ -664,6 +687,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); + const bool isValueType = (m_argType == ELEMENT_TYPE_VALUETYPE); + unsigned byteArgSize = StackElemSize(GetArgSize(), isValueType); + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { // Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes. @@ -673,7 +699,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { CorInfoHFAElemType type = m_argTypeHandle.GetHFAType(); pLoc->setHFAFieldSize(type); - pLoc->m_cFloatReg = GetArgSize()/pLoc->m_hfaFieldSize; + pLoc->m_cFloatReg = byteArgSize / pLoc->m_hfaFieldSize; } else @@ -683,17 +709,17 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE return; } - const unsigned argSizeInBytes = GetArgSize(); - unsigned cSlots; + + unsigned cSlots = (byteArgSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + + // Question: why do not arm and x86 have similar checks? // Composites greater than 16 bytes are passed by reference - if ((GetArgType() == ELEMENT_TYPE_VALUETYPE) && (argSizeInBytes > ENREGISTERED_PARAMTYPE_MAXSIZE)) + if ((GetArgType() == ELEMENT_TYPE_VALUETYPE) && (byteArgSize > ENREGISTERED_PARAMTYPE_MAXSIZE)) { + byteArgSize = TARGET_POINTER_SIZE; cSlots = 1; } - else - { - cSlots = (argSizeInBytes + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; - } + // Sanity check to make sure no caller is trying to get an ArgLocDesc that // describes the return buffer reg field that's in the TransitionBlock. @@ -702,12 +728,15 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - pLoc->m_cGenReg = cSlots; + pLoc->m_cGenReg = (byteArgSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE;; } else { pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); pLoc->m_cStack = cSlots; + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + pLoc->m_byteStackSize = byteArgSize; + _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif // TARGET_ARM64 @@ -764,12 +793,15 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE else { pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - int argOnStackSize; + pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); + int argSizeInBytes; if (IsArgPassedByRef()) - argOnStackSize = TARGET_POINTER_SIZE; + argSizeInBytes = TARGET_POINTER_SIZE; else - argOnStackSize = GetArgSize(); - pLoc->m_cStack = (argOnStackSize + STACK_ELEM_SIZE - 1) / STACK_ELEM_SIZE; + argSizeInBytes = GetArgSize(); + pLoc->m_cStack = (argSizeInBytes + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + pLoc->m_byteStackSize = argSizeInBytes; + _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif // TARGET_AMD64 @@ -789,16 +821,15 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE bool m_hasArgLocDescForStructInRegs; #endif // (TARGET_AMD64 && UNIX_AMD64_ABI) || TARGET_ARM64 + int m_curOfs; // Current position of the stack iterator, in bytes + #ifdef TARGET_X86 - int m_curOfs; // Current position of the stack iterator int m_numRegistersUsed; #ifdef FEATURE_INTERPRETER bool m_fUnmanagedCallConv; #endif #endif - int m_curOfs; // Current position of the stack iterator, in bytes - #ifdef UNIX_AMD64_ABI int m_idxGenReg; // Next general register to be assigned a value int m_idxFPReg; // Next floating point register to be assigned a value @@ -1196,7 +1227,6 @@ int ArgIteratorTemplate::GetNextOffset() int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; - int cArgSlots = cbArg / TARGET_POINTER_SIZE; m_curOfs += cbArg; return argOfs; @@ -1342,10 +1372,10 @@ int ArgIteratorTemplate::GetNextOffset() int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 4; int cRemainingRegs = 4 - m_idxGenReg; - if (cArgSlots <= cRemainingRegs) + if (cbArg <= cRemainingRegs * TARGET_POINTER_SIZE) { // Mark the registers just allocated as used. - m_idxGenReg += cArgSlots; + m_idxGenReg += (cbArg + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; return argOfs; } diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index e0c7f179fd60a..b778bfc0e442d 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -75,6 +75,8 @@ class ShuffleIterator int m_currentGenRegIndex; // Current floating point register index (relative to the ArgLocDesc::m_idxFloatReg) int m_currentFloatRegIndex; + // Current byte stack index (relative to the ArgLocDesc::m_byteStackIndex) + int m_currentByteStackIndex; // Current stack slot index (relative to the ArgLocDesc::m_idxStack) int m_currentStackSlotIndex; @@ -134,6 +136,7 @@ class ShuffleIterator #endif m_currentGenRegIndex(0), m_currentFloatRegIndex(0), + m_currentByteStackIndex(0), m_currentStackSlotIndex(0) { } @@ -141,9 +144,10 @@ class ShuffleIterator // Check if there are more offsets to shuffle bool HasNextOfs() { + _ASSERTE((m_currentStackSlotIndex < m_argLocDesc->m_cStack) == (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize)); return (m_currentGenRegIndex < m_argLocDesc->m_cGenReg) || (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg) || - (m_currentStackSlotIndex < m_argLocDesc->m_cStack); + (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize); } // Get next offset to shuffle. There has to be at least one offset left. @@ -182,10 +186,15 @@ class ShuffleIterator // If we get here we must have at least one stack slot left to shuffle (this method should only be called // when AnythingToShuffle(pArg) == true). - if (m_currentStackSlotIndex < m_argLocDesc->m_cStack) - { - index = m_argLocDesc->m_idxStack + m_currentStackSlotIndex; + _ASSERTE((m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) == (m_currentStackSlotIndex < m_argLocDesc->m_cStack)); + if (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) + { + const unsigned byteIndex = m_argLocDesc->m_byteStackIndex + m_currentByteStackIndex; + _ASSERTE((byteIndex % TARGET_POINTER_SIZE) == 0); + index = byteIndex / TARGET_POINTER_SIZE; + _ASSERTE((m_argLocDesc->m_idxStack + m_currentStackSlotIndex) == index); m_currentStackSlotIndex++; + m_currentByteStackIndex += m_argLocDesc->m_byteStackSize; // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. if (index >= ShuffleEntry::REGMASK) From d4237b71fb268fd9022583c1c968c207579264b1 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 6 Jan 2021 20:12:56 -0800 Subject: [PATCH 07/25] Stop using `#define STACK_ELEM_SIZE` --- src/coreclr/vm/amd64/cgencpu.h | 9 ++++----- src/coreclr/vm/arm/cgencpu.h | 9 ++++----- src/coreclr/vm/arm64/cgencpu.h | 11 +++++------ src/coreclr/vm/callingconvention.h | 4 ++-- src/coreclr/vm/comdelegate.cpp | 2 +- src/coreclr/vm/i386/cgencpu.h | 9 ++++----- 6 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index d47003974f60b..457fd4c9e3578 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -145,14 +145,13 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) // Parameter size //********************************************************************** -typedef INT64 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false) { + typedef INT64 StackElemType; + const unsigned stackSlotSize = sizeof(StackElemType); // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); - return (parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1); + static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } //********************************************************************** diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index f121746c9eb5a..8969f7c8acaec 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -99,14 +99,13 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); // Parameter size //********************************************************************** -typedef INT32 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false) { + typedef INT32 StackElemType; + const unsigned stackSlotSize = sizeof(StackElemType); // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); - return (parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1); + static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } //********************************************************************** diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 866e6390b749e..3ef6a7b4bbf71 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -81,14 +81,13 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) // Parameter size //********************************************************************** -typedef INT64 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - inline unsigned StackElemSize(unsigned parmSize, bool isValueType) { - // The expression below assumes STACK_ELEM_SIZE is a power of 2, so check that. - static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); - return ((parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); + typedef INT64 StackElemType; + const unsigned stackSlotSize = sizeof(StackElemType); + // The next expression assumes STACK_ELEM_SIZE is a power of 2. + static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } // diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 8673147f78bbc..293b9c5ac57c8 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -1764,14 +1764,14 @@ void ArgIteratorTemplate::ForceSigWalk() #else // UNIX_AMD64_ABI // All stack arguments take just one stack slot on AMD64 because of arguments bigger // than a stack slot are passed by reference. - stackElemSize = STACK_ELEM_SIZE; + stackElemSize = TARGET_POINTER_SIZE; #endif // UNIX_AMD64_ABI #else // TARGET_AMD64 const bool isValueType = (GetArgType() == ELEMENT_TYPE_VALUETYPE); stackElemSize = StackElemSize(GetArgSize(), isValueType); #if defined(ENREGISTERED_PARAMTYPE_MAXSIZE) if (IsArgPassedByRef()) - stackElemSize = STACK_ELEM_SIZE; + stackElemSize = TARGET_POINTER_SIZE; #endif #endif // TARGET_AMD64 diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index b778bfc0e442d..15c968a66c656 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -194,7 +194,7 @@ class ShuffleIterator index = byteIndex / TARGET_POINTER_SIZE; _ASSERTE((m_argLocDesc->m_idxStack + m_currentStackSlotIndex) == index); m_currentStackSlotIndex++; - m_currentByteStackIndex += m_argLocDesc->m_byteStackSize; + m_currentByteStackIndex += TARGET_POINTER_SIZE; // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. if (index >= ShuffleEntry::REGMASK) diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 054059a263a2a..7fb0eedd9d32d 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -102,14 +102,13 @@ EXTERN_C void SinglecastDelegateInvokeStub(); // Parameter size //********************************************************************** -typedef INT32 StackElemType; -#define STACK_ELEM_SIZE sizeof(StackElemType) - inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */) { + typedef INT32 StackElemType; + const unsigned stackSlotSize = sizeof(StackElemType); // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); - return (parmSize + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1); + static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } #include "stublinkerx86.h" From e130b18f41c5526fb95a8000da5025fc8b8b241d Mon Sep 17 00:00:00 2001 From: Sergey Date: Sun, 10 Jan 2021 19:57:58 -0800 Subject: [PATCH 08/25] Add `isFloatHfa`. --- .../classlibnative/bcltype/varargsnative.cpp | 15 ++++++++++----- src/coreclr/vm/amd64/cgencpu.h | 2 +- src/coreclr/vm/arm/cgencpu.h | 2 +- src/coreclr/vm/arm64/cgencpu.h | 2 +- src/coreclr/vm/callingconvention.h | 12 ++++++++---- src/coreclr/vm/clrvarargs.cpp | 3 ++- src/coreclr/vm/dllimport.cpp | 5 +++-- src/coreclr/vm/i386/cgencpu.h | 2 +- src/coreclr/vm/mlinfo.cpp | 7 +++++-- src/coreclr/vm/typehandle.cpp | 10 ++++++++++ src/coreclr/vm/typehandle.h | 2 ++ 11 files changed, 44 insertions(+), 18 deletions(-) diff --git a/src/coreclr/classlibnative/bcltype/varargsnative.cpp b/src/coreclr/classlibnative/bcltype/varargsnative.cpp index 5a1c755cde9ac..a53c0b09ba55a 100644 --- a/src/coreclr/classlibnative/bcltype/varargsnative.cpp +++ b/src/coreclr/classlibnative/bcltype/varargsnative.cpp @@ -115,7 +115,8 @@ static void InitCommon(VARARGS *data, VASigCookie** cookie) // Always skip over the varargs_cookie. const bool isValueType = false; - data->ArgPtr += StackElemSize(sizeof(LPVOID), isValueType); + const bool isFloatHfa = false; + data->ArgPtr += StackElemSize(sizeof(LPVOID), isValueType, isFloatHfa); #endif } @@ -142,7 +143,8 @@ void AdvanceArgPtr(VARARGS *data) TypeHandle thValueType; const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext, &thValueType); const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); - unsigned cbArg = StackElemSize(cbRaw, isValueType); + const bool isFloatHfa = false; + unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -268,7 +270,8 @@ VarArgsNative::Init2, TypeHandle thValueType; unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); - unsigned cbArg = StackElemSize(cbRaw, isValueType); + const bool isFloatHfa = false; + unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (ArgIterator::IsVarArgPassedByRef(cbRaw)) cbArg = sizeof(void*); @@ -425,7 +428,8 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC { COMPlusThrow(kNotSupportedException, W("NotSupported_Type")); } - size = StackElemSize(size, isValueType); + const bool isFloatHfa = false; + size = StackElemSize(size, isValueType, isFloatHfa); AdjustArgPtrForAlignment(_this, size); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE @@ -478,7 +482,8 @@ VarArgsNative::GetNextArgHelper( TypeHandle thValueType; const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType); const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType()); - unsigned cbArg = StackElemSize(cbRaw, isValueType); + const bool isFloatHfa = false; + unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa); AdjustArgPtrForAlignment(data, cbArg); // Get a pointer to the beginning of the argument. diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 457fd4c9e3578..771e135a1a6a8 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -145,7 +145,7 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) // Parameter size //********************************************************************** -inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { typedef INT64 StackElemType; const unsigned stackSlotSize = sizeof(StackElemType); diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 8969f7c8acaec..d8d01733de9a8 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -99,7 +99,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); // Parameter size //********************************************************************** -inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { typedef INT32 StackElemType; const unsigned stackSlotSize = sizeof(StackElemType); diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 3ef6a7b4bbf71..9bc10c5a385fc 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -81,7 +81,7 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) // Parameter size //********************************************************************** -inline unsigned StackElemSize(unsigned parmSize, bool isValueType) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatHfa) { typedef INT64 StackElemType; const unsigned stackSlotSize = sizeof(StackElemType); diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 293b9c5ac57c8..7966b04df05f0 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -688,7 +688,8 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); const bool isValueType = (m_argType == ELEMENT_TYPE_VALUETYPE); - unsigned byteArgSize = StackElemSize(GetArgSize(), isValueType); + unsigned byteArgSize = StackElemSize(GetArgSize(), isValueType, m_argTypeHandle.IsFloatHfa()); + if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { @@ -1460,7 +1461,7 @@ int ArgIteratorTemplate::GetNextOffset() break; } const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); - const int cbArg = StackElemSize(argSize, isValueType); + const int cbArg = StackElemSize(argSize, isValueType, thValueType.IsFloatHfa()); if (cFPRegs>0 && !this->IsVarArg()) { if (cFPRegs + m_idxFPReg <= 8) @@ -1767,8 +1768,11 @@ void ArgIteratorTemplate::ForceSigWalk() stackElemSize = TARGET_POINTER_SIZE; #endif // UNIX_AMD64_ABI #else // TARGET_AMD64 - const bool isValueType = (GetArgType() == ELEMENT_TYPE_VALUETYPE); - stackElemSize = StackElemSize(GetArgSize(), isValueType); + + TypeHandle thValueType; + const CorElementType argType = GetArgType(&thValueType); + const bool isValueType = (argType == ELEMENT_TYPE_VALUETYPE); + stackElemSize = StackElemSize(GetArgSize(), isValueType, thValueType.IsFloatHfa()); #if defined(ENREGISTERED_PARAMTYPE_MAXSIZE) if (IsArgPassedByRef()) stackElemSize = TARGET_POINTER_SIZE; diff --git a/src/coreclr/vm/clrvarargs.cpp b/src/coreclr/vm/clrvarargs.cpp index 33bec19e93e4e..37c30739aedf5 100644 --- a/src/coreclr/vm/clrvarargs.cpp +++ b/src/coreclr/vm/clrvarargs.cpp @@ -77,7 +77,8 @@ VARARGS::MarshalToUnmanagedVaList( case ELEMENT_TYPE_PTR: { const bool isValueType = false; - DWORD cbSize = StackElemSize(CorTypeInfo::Size(elemType), isValueType); + const bool isFloatHfa = false; + DWORD cbSize = StackElemSize(CorTypeInfo::Size(elemType), isValueType, isFloatHfa); #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (cbSize > ENREGISTERED_PARAMTYPE_MAXSIZE) diff --git a/src/coreclr/vm/dllimport.cpp b/src/coreclr/vm/dllimport.cpp index ff6b4982333ca..ea3ec6fbebba8 100644 --- a/src/coreclr/vm/dllimport.cpp +++ b/src/coreclr/vm/dllimport.cpp @@ -3231,7 +3231,7 @@ BOOL NDirect::MarshalingRequired( if (i > 0) { const bool isValueType = true; - dwStackSize += StackElemSize(hndArgType.GetSize(), isValueType); + dwStackSize += StackElemSize(hndArgType.GetSize(), isValueType, hndArgType.IsFloatHfa()); } break; } @@ -3252,7 +3252,8 @@ BOOL NDirect::MarshalingRequired( if (i > 0) { const bool isValueType = false; - dwStackSize += StackElemSize(CorTypeInfo::Size(type), isValueType); + const bool isFloatHfa = false; + dwStackSize += StackElemSize(CorTypeInfo::Size(type), isValueType, isFloatHfa); } } else diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 7fb0eedd9d32d..5da84103a2d8a 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -102,7 +102,7 @@ EXTERN_C void SinglecastDelegateInvokeStub(); // Parameter size //********************************************************************** -inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */) +inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { typedef INT32 StackElemType; const unsigned stackSlotSize = sizeof(StackElemType); diff --git a/src/coreclr/vm/mlinfo.cpp b/src/coreclr/vm/mlinfo.cpp index acd3f170b68c0..71788db9efd5d 100644 --- a/src/coreclr/vm/mlinfo.cpp +++ b/src/coreclr/vm/mlinfo.cpp @@ -2875,7 +2875,8 @@ void MarshalInfo::SetupArgumentSizes() const unsigned targetPointerSize = TARGET_POINTER_SIZE; const bool pointerIsValueType = false; - _ASSERTE(targetPointerSize == StackElemSize(TARGET_POINTER_SIZE, pointerIsValueType)); + const bool pointerIsFloatHfa = false; + _ASSERTE(targetPointerSize == StackElemSize(TARGET_POINTER_SIZE, pointerIsValueType, pointerIsFloatHfa)); if (m_byref) { @@ -2883,7 +2884,9 @@ void MarshalInfo::SetupArgumentSizes() } else { - m_nativeArgSize = StackElemSize(GetNativeSize(m_type), IsValueClass(m_type)); + const bool isValueType = IsValueClass(m_type); + const bool isFloatHfa = isValueType && (m_pMT->GetHFAType() == CORINFO_HFA_ELEM_FLOAT); + m_nativeArgSize = StackElemSize(GetNativeSize(m_type), isValueType, isFloatHfa); } #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE diff --git a/src/coreclr/vm/typehandle.cpp b/src/coreclr/vm/typehandle.cpp index b27feaec2b0f3..b198c87467c92 100644 --- a/src/coreclr/vm/typehandle.cpp +++ b/src/coreclr/vm/typehandle.cpp @@ -434,6 +434,16 @@ CorInfoHFAElemType TypeHandle::GetHFAType() const return CORINFO_HFA_ELEM_NONE; } +bool TypeHandle::IsFloatHfa() const +{ + WRAPPER_NO_CONTRACT; + if (IsNull() || !IsHFA()) + { + return false; + } + return (GetHFAType() == CORINFO_HFA_ELEM_FLOAT); +} + #ifdef FEATURE_64BIT_ALIGNMENT bool TypeHandle::RequiresAlign8() const diff --git a/src/coreclr/vm/typehandle.h b/src/coreclr/vm/typehandle.h index 387a75b362138..e623d4a8cec84 100644 --- a/src/coreclr/vm/typehandle.h +++ b/src/coreclr/vm/typehandle.h @@ -373,6 +373,8 @@ class TypeHandle bool IsHFA() const; CorInfoHFAElemType GetHFAType() const; + bool IsFloatHfa() const; + #ifdef FEATURE_64BIT_ALIGNMENT bool RequiresAlign8() const; #endif // FEATURE_64BIT_ALIGNMENT From 28fc554be188f68b931af36f1a13e6ef94287f14 Mon Sep 17 00:00:00 2001 From: Sergey Date: Sun, 10 Jan 2021 20:09:28 -0800 Subject: [PATCH 09/25] delete checking code. because it won't pass on arm64 apple. --- src/coreclr/vm/callingconvention.h | 20 -------------------- src/coreclr/vm/comdelegate.cpp | 9 +-------- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 7966b04df05f0..d10bcbbe47538 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -39,9 +39,6 @@ struct ArgLocDesc int m_idxGenReg; // First general register used (or -1) int m_cGenReg; // Count of general registers used (or 0) - int m_idxStack; // First stack slot used (or -1) - int m_cStack; // Count of stack slots used (or 0) - int m_byteStackIndex; // Stack offset in bytes (or -1) int m_byteStackSize; // Stack size in bytes @@ -88,8 +85,6 @@ struct ArgLocDesc m_cFloatReg = 0; m_idxGenReg = -1; m_cGenReg = 0; - m_idxStack = -1; - m_cStack = 0; m_byteStackIndex = -1; m_byteStackSize = 0; #if defined(TARGET_ARM) @@ -621,11 +616,8 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - pLoc->m_cStack = cSlots; pLoc->m_byteStackSize = GetArgSize(); pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); - _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif @@ -661,20 +653,14 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE else { pLoc->m_cGenReg = 4 - pLoc->m_idxGenReg; - pLoc->m_cStack = cSlots - pLoc->m_cGenReg; - pLoc->m_idxStack = 0; pLoc->m_byteStackIndex = 0; pLoc->m_byteStackSize = byteArgSize - pLoc->m_cGenReg * TARGET_POINTER_SIZE; } } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - pLoc->m_cStack = cSlots; - pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); pLoc->m_byteStackSize = byteArgSize; - _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif // TARGET_ARM @@ -733,11 +719,8 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); - pLoc->m_cStack = cSlots; pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); pLoc->m_byteStackSize = byteArgSize; - _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif // TARGET_ARM64 @@ -793,16 +776,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE } else { - pLoc->m_idxStack = TransitionBlock::GetStackArgumentIndexFromOffset(argOffset); pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); int argSizeInBytes; if (IsArgPassedByRef()) argSizeInBytes = TARGET_POINTER_SIZE; else argSizeInBytes = GetArgSize(); - pLoc->m_cStack = (argSizeInBytes + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; pLoc->m_byteStackSize = argSizeInBytes; - _ASSERTE(pLoc->m_byteStackIndex / TARGET_POINTER_SIZE == pLoc->m_idxStack); } } #endif // TARGET_AMD64 diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 15c968a66c656..237960b023890 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -77,8 +77,6 @@ class ShuffleIterator int m_currentFloatRegIndex; // Current byte stack index (relative to the ArgLocDesc::m_byteStackIndex) int m_currentByteStackIndex; - // Current stack slot index (relative to the ArgLocDesc::m_idxStack) - int m_currentStackSlotIndex; #if defined(UNIX_AMD64_ABI) // Get next shuffle offset for struct passed in registers. There has to be at least one offset left. @@ -136,15 +134,13 @@ class ShuffleIterator #endif m_currentGenRegIndex(0), m_currentFloatRegIndex(0), - m_currentByteStackIndex(0), - m_currentStackSlotIndex(0) + m_currentByteStackIndex(0) { } // Check if there are more offsets to shuffle bool HasNextOfs() { - _ASSERTE((m_currentStackSlotIndex < m_argLocDesc->m_cStack) == (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize)); return (m_currentGenRegIndex < m_argLocDesc->m_cGenReg) || (m_currentFloatRegIndex < m_argLocDesc->m_cFloatReg) || (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize); @@ -186,14 +182,11 @@ class ShuffleIterator // If we get here we must have at least one stack slot left to shuffle (this method should only be called // when AnythingToShuffle(pArg) == true). - _ASSERTE((m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) == (m_currentStackSlotIndex < m_argLocDesc->m_cStack)); if (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) { const unsigned byteIndex = m_argLocDesc->m_byteStackIndex + m_currentByteStackIndex; _ASSERTE((byteIndex % TARGET_POINTER_SIZE) == 0); index = byteIndex / TARGET_POINTER_SIZE; - _ASSERTE((m_argLocDesc->m_idxStack + m_currentStackSlotIndex) == index); - m_currentStackSlotIndex++; m_currentByteStackIndex += TARGET_POINTER_SIZE; // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. From 8364408c4585cd2d492eacd2746bf65a93304e3d Mon Sep 17 00:00:00 2001 From: Sergey Date: Sun, 10 Jan 2021 20:12:42 -0800 Subject: [PATCH 10/25] arm64 apple fixes. --- src/coreclr/vm/arm64/cgencpu.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 9bc10c5a385fc..a5bd162b7c906 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -83,6 +83,19 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatHfa) { +#if defined(OSX_ARM64_ABI) + if (!isValueType) + { + // No padding/alignment for primitive types. + return parmSize; + } + if (isFloatHfa) + { + // float hfa is not considered a struct type and passed with 4-byte alignment. + return sizeof(float); + } +#endif + typedef INT64 StackElemType; const unsigned stackSlotSize = sizeof(StackElemType); // The next expression assumes STACK_ELEM_SIZE is a power of 2. From 18340b4c37211c252ee6d5c999b2e7e87769c23f Mon Sep 17 00:00:00 2001 From: Sergey Date: Sun, 10 Jan 2021 20:26:35 -0800 Subject: [PATCH 11/25] roundUp the stack size. --- src/coreclr/vm/callingconvention.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index d10bcbbe47538..e2f28e1a064d6 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -1789,6 +1789,9 @@ void ArgIteratorTemplate::ForceSigWalk() #endif // TARGET_X86 + // arg stack size is rounded to the pointer size on all platforms. + nSizeOfArgStack = (int)ALIGN_UP(nSizeOfArgStack, TARGET_POINTER_SIZE); + // Cache the result m_nSizeOfArgStack = nSizeOfArgStack; m_dwFlags |= SIZE_OF_ARG_STACK_COMPUTED; From bb99453efd50bd4b0bdfadbe3233e3bb0c7f1883 Mon Sep 17 00:00:00 2001 From: Sergey Date: Mon, 28 Dec 2020 23:48:25 -0800 Subject: [PATCH 12/25] Add a reflection test. --- .../regression/manyStackArgs/manyStackArgs.cs | 64 +++++++++++++++++++ .../manyStackArgs/manyStackArgs.csproj | 13 ++++ 2 files changed, 77 insertions(+) create mode 100644 src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs create mode 100644 src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj diff --git a/src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs new file mode 100644 index 0000000000000..11171e15c4088 --- /dev/null +++ b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.cs @@ -0,0 +1,64 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +public class TestSmallStackArgsClass +{ + public TestSmallStackArgsClass() + { } + + [MethodImpl(MethodImplOptions.NoInlining)] + public int TestSmallStackArgsMethod(short s1, short s2, short s3, short s4, short s5, short s6, short s7, short s8, short s9, short s10, short s11, short s12) + { + if (s1 != 1 || s2 != 2 || s3 != 3 || s4 != 4 || s5 != 5 || s6 != 6 || s7 != 7 || s8 != 8 || s9 != 9 || s10 != 10 || s11 != 11 || s12 != 12) + { + Console.WriteLine("small stack arguments do not match."); + Console.WriteLine("Expected: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12"); + Console.WriteLine("Actual: " + s1 + ", " + s2 + ", " + s3 + ", " + s4 + ", " + s5 + ", " + s6 + ", " + s7 + ", " + s8 + ", " + s9 + ", " + s10 + ", " + s11 + ", " + s12); + return 101; + } + return 100; + } +} + +public class TestMethodInfo +{ + + [MethodImpl(MethodImplOptions.NoInlining)] + public static int InvokeReflection(object testClassObject, MethodInfo testMethod, object[] args) + { + + object retValue = testMethod.Invoke(testClassObject, args); + int r = Convert.ToInt32(retValue); + return r; + } + + + public static int Main() + { + Type testClass = Type.GetType("TestSmallStackArgsClass"); + ConstructorInfo testConstructor = testClass.GetConstructor(Type.EmptyTypes); + object testClassObject = testConstructor.Invoke(new object[] { }); + + MethodInfo testMethod = testClass.GetMethod("TestSmallStackArgsMethod"); + var args = new object[12]; + for (short i = 0; i < 12; ++i) + { + args[i] = (short)(i + 1); + } + + int r = InvokeReflection(testClassObject, testMethod, args); + + if (r != 100) + { + Console.WriteLine("The test failed"); + return 101; + } + Console.WriteLine("The test passed"); + return 100; + } +} diff --git a/src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj new file mode 100644 index 0000000000000..adc3b7203ae20 --- /dev/null +++ b/src/tests/reflection/regression/manyStackArgs/manyStackArgs.csproj @@ -0,0 +1,13 @@ + + + Exe + 0 + + + None + True + + + + + From 7e00748ca1e5b38a3e9b2b0b69c23c23856b79e0 Mon Sep 17 00:00:00 2001 From: Sergey Date: Mon, 11 Jan 2021 14:41:18 -0800 Subject: [PATCH 13/25] Return byte offset from `GetNextOfs`. It is not a complete fix for arm64 apple, but covers most cases. --- src/coreclr/vm/comdelegate.cpp | 48 +++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index 237960b023890..35baf178c3924 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -147,7 +147,9 @@ class ShuffleIterator } // Get next offset to shuffle. There has to be at least one offset left. - UINT16 GetNextOfs() + // For register arguments it returns regNum | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK. + // For stack arguments it returns stack offset in bytes with negative sign. + int GetNextOfs() { int index; @@ -157,7 +159,9 @@ class ShuffleIterator EEClass* eeClass = m_argLocDesc->m_eeClass; if (m_argLocDesc->m_eeClass != 0) { - return GetNextOfsInStruct(); + index = GetNextOfsInStruct(); + _ASSERT((index & ShuffleEntry::REGMASK) != 0); + return index; } #endif // UNIX_AMD64_ABI @@ -167,7 +171,7 @@ class ShuffleIterator index = m_argLocDesc->m_idxFloatReg + m_currentFloatRegIndex; m_currentFloatRegIndex++; - return (UINT16)index | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK; + return index | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK; } // Shuffle any registers first (the order matters since otherwise we could end up shuffling a stack slot @@ -177,7 +181,7 @@ class ShuffleIterator index = m_argLocDesc->m_idxGenReg + m_currentGenRegIndex; m_currentGenRegIndex++; - return (UINT16)index | ShuffleEntry::REGMASK; + return index | ShuffleEntry::REGMASK; } // If we get here we must have at least one stack slot left to shuffle (this method should only be called @@ -185,7 +189,6 @@ class ShuffleIterator if (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) { const unsigned byteIndex = m_argLocDesc->m_byteStackIndex + m_currentByteStackIndex; - _ASSERTE((byteIndex % TARGET_POINTER_SIZE) == 0); index = byteIndex / TARGET_POINTER_SIZE; m_currentByteStackIndex += TARGET_POINTER_SIZE; @@ -195,7 +198,7 @@ class ShuffleIterator COMPlusThrow(kNotSupportedException); } - return (UINT16)index; + return -(int)byteIndex; } // There are no more offsets to get, the caller should not have called us @@ -264,13 +267,39 @@ BOOL AddNextShuffleEntryToArray(ArgLocDesc sArgSrc, ArgLocDesc sArgDst, SArrayAppend(entry); } } From 2026ab79c34b67e22618baf46df51950007431c6 Mon Sep 17 00:00:00 2001 From: Sergey Date: Mon, 11 Jan 2021 14:41:18 -0800 Subject: [PATCH 14/25] Add `FLOAT_REGISTER_SIZE` --- src/coreclr/vm/amd64/cgencpu.h | 2 ++ src/coreclr/vm/arm/cgencpu.h | 2 ++ src/coreclr/vm/arm64/cgencpu.h | 3 +++ src/coreclr/vm/callingconvention.h | 12 +++++------- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 771e135a1a6a8..0bb9c506df79d 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -101,6 +101,8 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr); #define X86RegFromAMD64Reg(extended_reg) \ ((X86Reg)(((int)extended_reg) & X86_REGISTER_MASK)) +#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes. + // Why is the return value ARG_SLOT? On 64-bit systems, that is 64-bits // and much bigger than necessary for R4, requiring explicit downcasts. inline diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index d8d01733de9a8..e063c8f683610 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -95,6 +95,8 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); // Offset of pc register #define PC_REG_RELATIVE_OFFSET 4 +#define FLOAT_REGISTER_SIZE 4 // each register in FloatArgumentRegisters is 4 bytes. + //********************************************************************** // Parameter size //********************************************************************** diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index a5bd162b7c906..5793fc13a8420 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -58,6 +58,8 @@ extern PCODE GetPreStubEntryPoint(); #define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter #define CALLDESCR_RETBUFFARGREG 1 // CallDescrWorker has RetBuffArg parameter that's separate from arg regs +#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes. + // Given a return address retrieved during stackwalk, // this is the offset by which it should be decremented to arrive at the callsite. #define STACKWALK_CONTROLPC_ADJUST_OFFSET 4 @@ -87,6 +89,7 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatH if (!isValueType) { // No padding/alignment for primitive types. + _ASSERTE((((parmSize & (parmSize - 1)) == 0)); return parmSize; } if (isFloatHfa) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index e2f28e1a064d6..27828d4bd841a 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -636,9 +636,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE int cSlots = (GetArgSize() + 3) / 4; if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / TARGET_POINTER_SIZE; - _ASSERTE(byteArgSize / TARGET_POINTER_SIZE == cSlots); - pLoc->m_cFloatReg = byteArgSize / TARGET_POINTER_SIZE; + pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; + _ASSERTE(byteArgSize / FLOAT_REGISTER_SIZE == cSlots); + pLoc->m_cFloatReg = byteArgSize / FLOAT_REGISTER_SIZE; return; } @@ -679,8 +679,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - // Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes. - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 16; + pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()) { @@ -752,8 +751,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE #if defined(UNIX_AMD64_ABI) if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - // Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes. - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 16; + pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; pLoc->m_cFloatReg = 1; } else From 78517bdd0b0fdd47cd482bff8dec6a8c4d4efa8d Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 13:27:29 -0800 Subject: [PATCH 15/25] Use StackElemSize for ` pLoc->m_byteStackSize`. --- src/coreclr/vm/callingconvention.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 27828d4bd841a..c40a63e4a0bc8 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -685,7 +685,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { CorInfoHFAElemType type = m_argTypeHandle.GetHFAType(); pLoc->setHFAFieldSize(type); - pLoc->m_cFloatReg = byteArgSize / pLoc->m_hfaFieldSize; + pLoc->m_cFloatReg = GetArgSize() / pLoc->m_hfaFieldSize; } else @@ -780,7 +780,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE argSizeInBytes = TARGET_POINTER_SIZE; else argSizeInBytes = GetArgSize(); - pLoc->m_byteStackSize = argSizeInBytes; + pLoc->m_byteStackSize = StackElemSize(argSizeInBytes); } } #endif // TARGET_AMD64 From 215178a7a228414294a94666e3b97445c48bfc5f Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 13:34:50 -0800 Subject: [PATCH 16/25] replace `assert` with `_ASSERTE`. --- src/coreclr/vm/callingconvention.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index c40a63e4a0bc8..c903e958329ef 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -1274,7 +1274,7 @@ int ArgIteratorTemplate::GetNextOffset() m_fRequires64BitAlignment = fRequiresAlign64Bit; int cbArg = StackElemSize(argSize); - assert((cbArg % TARGET_POINTER_SIZE) == 0); + _ASSERTE((cbArg % TARGET_POINTER_SIZE) == 0); // Ignore floating point argument placement in registers if we're dealing with a vararg function (the ABI // specifies this so that vararg processing on the callee side is simplified). From 35202d52a4cb9d70bb97e540f387678788802ce2 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 13:32:02 -0800 Subject: [PATCH 17/25] Use `ALIGN_UP` in the code that I have changed. --- src/coreclr/inc/stdmacros.h | 10 ++-------- src/coreclr/vm/callingconvention.h | 10 +++++----- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/coreclr/inc/stdmacros.h b/src/coreclr/inc/stdmacros.h index 6f1f884b2c7ac..f2417c7637545 100644 --- a/src/coreclr/inc/stdmacros.h +++ b/src/coreclr/inc/stdmacros.h @@ -185,17 +185,11 @@ inline size_t ALIGN_UP( size_t val, size_t alignment ) _ASSERTE( result >= val ); // check for overflow return result; } -inline void* ALIGN_UP( void* val, size_t alignment ) -{ - WRAPPER_NO_CONTRACT; - return (void*) ALIGN_UP( (size_t)val, alignment ); -} -inline uint8_t* ALIGN_UP( uint8_t* val, size_t alignment ) +template inline T ALIGN_UP(T val, size_t alignment) { WRAPPER_NO_CONTRACT; - - return (uint8_t*) ALIGN_UP( (size_t)val, alignment ); + return (T)ALIGN_UP((size_t)val, alignment); } inline size_t ALIGN_DOWN( size_t val, size_t alignment ) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index c903e958329ef..2678f69f6e1e5 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -648,7 +648,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if ((int)byteArgSize <= (4 - pLoc->m_idxGenReg) * TARGET_POINTER_SIZE) { - pLoc->m_cGenReg = (byteArgSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + pLoc->m_cGenReg = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; } else { @@ -696,7 +696,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE } - unsigned cSlots = (byteArgSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + unsigned cSlots = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; // Question: why do not arm and x86 have similar checks? // Composites greater than 16 bytes are passed by reference @@ -714,7 +714,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - pLoc->m_cGenReg = (byteArgSize + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE;; + pLoc->m_cGenReg = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;; } else { @@ -1354,7 +1354,7 @@ int ArgIteratorTemplate::GetNextOffset() if (cbArg <= cRemainingRegs * TARGET_POINTER_SIZE) { // Mark the registers just allocated as used. - m_idxGenReg += (cbArg + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + m_idxGenReg += ALIGN_UP(cbArg, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; return argOfs; } @@ -1459,7 +1459,7 @@ int ArgIteratorTemplate::GetNextOffset() #if !defined(OSX_ARM64_ABI) _ASSERTE((cbArg% TARGET_POINTER_SIZE) == 0); #endif - const int regSlots = (cbArg + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + const int regSlots = ALIGN_UP(cbArg, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; // Only x0-x7 are valid argument registers (x8 is always the return buffer) if (m_idxGenReg + regSlots <= 8) { From 2ea716bba4b86e2336aa9ef6be50805add9e8a9d Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 14:18:57 -0800 Subject: [PATCH 18/25] rename `m_curOfs` as `m_ofsStack`. --- src/coreclr/vm/callingconvention.h | 58 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 2678f69f6e1e5..a895a4ca656bc 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -800,7 +800,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE bool m_hasArgLocDescForStructInRegs; #endif // (TARGET_AMD64 && UNIX_AMD64_ABI) || TARGET_ARM64 - int m_curOfs; // Current position of the stack iterator, in bytes + int m_ofsStack; // Current position of the stack iterator, in bytes #ifdef TARGET_X86 int m_numRegistersUsed; @@ -1032,7 +1032,7 @@ int ArgIteratorTemplate::GetNextOffset() case IMAGE_CEE_CS_CALLCONV_C: case IMAGE_CEE_CS_CALLCONV_STDCALL: m_numRegistersUsed = NUM_ARGUMENT_REGISTERS; - m_curOfs = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); m_fUnmanagedCallConv = true; break; @@ -1043,30 +1043,30 @@ int ArgIteratorTemplate::GetNextOffset() default: m_fUnmanagedCallConv = false; m_numRegistersUsed = numRegistersUsed; - m_curOfs = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); break; } #else m_numRegistersUsed = numRegistersUsed; - m_curOfs = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + SizeOfArgStack(); #endif #elif defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI m_idxGenReg = numRegistersUsed; - m_curOfs = 0; + m_ofsStack = 0; m_idxFPReg = 0; #else - m_curOfs = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); + m_ofsStack = TransitionBlock::GetOffsetOfArgs() + numRegistersUsed * sizeof(void *); #endif #elif defined(TARGET_ARM) m_idxGenReg = numRegistersUsed; - m_curOfs = 0; + m_ofsStack = 0; m_wFPRegs = 0; #elif defined(TARGET_ARM64) m_idxGenReg = numRegistersUsed; - m_curOfs = 0; + m_ofsStack = 0; m_idxFPReg = 0; #else @@ -1099,8 +1099,8 @@ int ArgIteratorTemplate::GetNextOffset() #ifdef FEATURE_INTERPRETER if (m_fUnmanagedCallConv) { - int argOfs = m_curOfs; - m_curOfs += StackElemSize(argSize); + int argOfs = m_ofsStack; + m_ofsStack += StackElemSize(argSize); return argOfs; } #endif @@ -1109,9 +1109,9 @@ int ArgIteratorTemplate::GetNextOffset() return TransitionBlock::GetOffsetOfArgumentRegisters() + (NUM_ARGUMENT_REGISTERS - m_numRegistersUsed) * sizeof(void *); } - m_curOfs -= StackElemSize(argSize); - _ASSERTE(m_curOfs >= TransitionBlock::GetOffsetOfArgs()); - return m_curOfs; + m_ofsStack -= StackElemSize(argSize); + _ASSERTE(m_ofsStack >= TransitionBlock::GetOffsetOfArgs()); + return m_ofsStack; #elif defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI @@ -1204,15 +1204,15 @@ int ArgIteratorTemplate::GetNextOffset() m_fArgInRegisters = false; - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; - m_curOfs += cbArg; + m_ofsStack += cbArg; return argOfs; #else // Each argument takes exactly one slot on AMD64 on Windows - int argOfs = m_curOfs; - m_curOfs += sizeof(void *); + int argOfs = m_ofsStack; + m_ofsStack += sizeof(void *); return argOfs; #endif #elif defined(TARGET_ARM) @@ -1322,14 +1322,14 @@ int ArgIteratorTemplate::GetNextOffset() // Doubles or HFAs containing doubles need the stack aligned appropriately. if (fRequiresAlign64Bit) { - m_curOfs = (int)ALIGN_UP(m_curOfs, TARGET_POINTER_SIZE * 2); + m_ofsStack = (int)ALIGN_UP(m_ofsStack, TARGET_POINTER_SIZE * 2); } // Indicate the stack location of the argument to the caller. - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; // Record the stack usage. - m_curOfs += cbArg; + m_ofsStack += cbArg; return argOfs; } @@ -1365,9 +1365,9 @@ int ArgIteratorTemplate::GetNextOffset() m_idxGenReg = 4; - if (m_curOfs == 0) + if (m_ofsStack == 0) { - m_curOfs += cbArg - cRemainingRegs * TARGET_POINTER_SIZE; + m_ofsStack += cbArg - cRemainingRegs * TARGET_POINTER_SIZE; return argOfs; } } @@ -1376,13 +1376,13 @@ int ArgIteratorTemplate::GetNextOffset() { // The argument requires 64-bit alignment. If it is going to be passed on the stack, align // the next stack slot. See step C.6 in the algorithm in the ABI spec. - m_curOfs = (int)ALIGN_UP(m_curOfs, TARGET_POINTER_SIZE * 2); + m_ofsStack = (int)ALIGN_UP(m_ofsStack, TARGET_POINTER_SIZE * 2); } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; // Advance the stack pointer over the argument just placed. - m_curOfs += cbArg; + m_ofsStack += cbArg; return argOfs; #elif defined(TARGET_ARM64) @@ -1479,9 +1479,9 @@ int ArgIteratorTemplate::GetNextOffset() // into x0-x7, and any remaining stack arguments are placed normally. int argOfs = TransitionBlock::GetOffsetOfArgumentRegisters() + m_idxGenReg * 8; - // Increase m_curOfs to account for the space used for the remainder of the arg after + // Increase m_ofsStack to account for the space used for the remainder of the arg after // registers are filled. - m_curOfs += cbArg + (m_idxGenReg - 8) * TARGET_POINTER_SIZE; + m_ofsStack += cbArg + (m_idxGenReg - 8) * TARGET_POINTER_SIZE; // We used up the remaining reg slots. m_idxGenReg = 8; @@ -1497,8 +1497,8 @@ int ArgIteratorTemplate::GetNextOffset() } } - int argOfs = TransitionBlock::GetOffsetOfArgs() + m_curOfs; - m_curOfs += cbArg; + int argOfs = TransitionBlock::GetOffsetOfArgs() + m_ofsStack; + m_ofsStack += cbArg; return argOfs; #else PORTABILITY_ASSERT("ArgIteratorTemplate::GetNextOffset"); From 164f54f14b7bb1fc61bdf842dd23b2550e2b07fd Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 15:18:27 -0800 Subject: [PATCH 19/25] delete "ceremony " from `StackElemSize`. --- src/coreclr/vm/amd64/cgencpu.h | 5 +---- src/coreclr/vm/arm/cgencpu.h | 5 +---- src/coreclr/vm/arm64/cgencpu.h | 5 +---- src/coreclr/vm/i386/cgencpu.h | 5 +---- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 0bb9c506df79d..5c118aafe9c38 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -149,10 +149,7 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { - typedef INT64 StackElemType; - const unsigned stackSlotSize = sizeof(StackElemType); - // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + const unsigned stackSlotSize = sizeof(void*); return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index e063c8f683610..0faa8695f562b 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -103,10 +103,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { - typedef INT32 StackElemType; - const unsigned stackSlotSize = sizeof(StackElemType); - // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + const unsigned stackSlotSize = sizeof(void*); return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 5793fc13a8420..a304c4e79b1c3 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -99,10 +99,7 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatH } #endif - typedef INT64 StackElemType; - const unsigned stackSlotSize = sizeof(StackElemType); - // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + const unsigned stackSlotSize = sizeof(void*); return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 5da84103a2d8a..389f9094afbd7 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -104,10 +104,7 @@ EXTERN_C void SinglecastDelegateInvokeStub(); inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { - typedef INT32 StackElemType; - const unsigned stackSlotSize = sizeof(StackElemType); - // The next expression assumes STACK_ELEM_SIZE is a power of 2. - static_assert(((stackSlotSize & (stackSlotSize-1)) == 0), "STACK_ELEM_SIZE must be a power of 2"); + const unsigned stackSlotSize = sizeof(void*); return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); } From cc5838e3d03424a42b6d92a15b863eb298b62712 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 16:18:11 -0800 Subject: [PATCH 20/25] Delete `cSlots` and don't call `StackElemSize` on `GetArgSize`. --- src/coreclr/vm/callingconvention.h | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index a895a4ca656bc..1cd8fe460f992 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -605,13 +605,10 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); - int cSlots = (GetArgSize() + 3) / 4; - const unsigned byteArgSize = StackElemSize(GetArgSize()); if (!TransitionBlock::IsStackArgumentOffset(argOffset)) { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - _ASSERTE(cSlots == 1); - _ASSERTE(byteArgSize <= TARGET_POINTER_SIZE); + _ASSERTE(GetArgSize() <= TARGET_POINTER_SIZE); pLoc->m_cGenReg = 1; } else @@ -632,13 +629,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->m_fRequires64BitAlignment = m_fRequires64BitAlignment; - const unsigned byteArgSize = StackElemSize(GetArgSize()); - int cSlots = (GetArgSize() + 3) / 4; + const int byteArgSize = GetArgSize(); if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { + const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); + _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; - _ASSERTE(byteArgSize / FLOAT_REGISTER_SIZE == cSlots); - pLoc->m_cFloatReg = byteArgSize / FLOAT_REGISTER_SIZE; + pLoc->m_cFloatReg = ALIGN_UP(byteArgSize, FLOAT_REGISTER_SIZE) / FLOAT_REGISTER_SIZE; return; } @@ -646,7 +643,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { pLoc->m_idxGenReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); - if ((int)byteArgSize <= (4 - pLoc->m_idxGenReg) * TARGET_POINTER_SIZE) + if (byteArgSize <= (4 - pLoc->m_idxGenReg) * TARGET_POINTER_SIZE) { pLoc->m_cGenReg = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; } @@ -654,13 +651,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { pLoc->m_cGenReg = 4 - pLoc->m_idxGenReg; pLoc->m_byteStackIndex = 0; - pLoc->m_byteStackSize = byteArgSize - pLoc->m_cGenReg * TARGET_POINTER_SIZE; + pLoc->m_byteStackSize = StackElemSize(byteArgSize) - pLoc->m_cGenReg * TARGET_POINTER_SIZE; } } else { pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); - pLoc->m_byteStackSize = byteArgSize; + pLoc->m_byteStackSize = StackElemSize(byteArgSize); } } #endif // TARGET_ARM @@ -673,9 +670,6 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->Init(); - const bool isValueType = (m_argType == ELEMENT_TYPE_VALUETYPE); - unsigned byteArgSize = StackElemSize(GetArgSize(), isValueType, m_argTypeHandle.IsFloatHfa()); - if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { @@ -695,15 +689,13 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE return; } - - unsigned cSlots = ALIGN_UP(byteArgSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; + unsigned byteArgSize = GetArgSize(); // Question: why do not arm and x86 have similar checks? // Composites greater than 16 bytes are passed by reference if ((GetArgType() == ELEMENT_TYPE_VALUETYPE) && (byteArgSize > ENREGISTERED_PARAMTYPE_MAXSIZE)) { byteArgSize = TARGET_POINTER_SIZE; - cSlots = 1; } @@ -719,7 +711,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE else { pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); - pLoc->m_byteStackSize = byteArgSize; + const bool isValueType = (m_argType == ELEMENT_TYPE_VALUETYPE); + const bool isFloatHfa = (isValueType && !m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()); + pLoc->m_byteStackSize = StackElemSize(byteArgSize, isValueType, isFloatHfa); } } #endif // TARGET_ARM64 From 9948f4bcef0855d14946146d3732dfee16530b35 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 20:28:07 -0800 Subject: [PATCH 21/25] Fix an assert. --- src/coreclr/vm/arm64/cgencpu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index a304c4e79b1c3..af15ec94da116 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -88,8 +88,9 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatH #if defined(OSX_ARM64_ABI) if (!isValueType) { + // The primitive types' sizes are expected to be powers of 2. + _ASSERTE((parmSize & (parmSize - 1)) == 0); // No padding/alignment for primitive types. - _ASSERTE((((parmSize & (parmSize - 1)) == 0)); return parmSize; } if (isFloatHfa) From 80d16c34293f9863adbc719a4608246a76dd2908 Mon Sep 17 00:00:00 2001 From: Sergey Date: Tue, 19 Jan 2021 21:02:29 -0800 Subject: [PATCH 22/25] Fix nit. --- src/coreclr/vm/amd64/cgencpu.h | 2 +- src/coreclr/vm/arm/cgencpu.h | 2 +- src/coreclr/vm/arm64/cgencpu.h | 2 +- src/coreclr/vm/i386/cgencpu.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 5c118aafe9c38..4f40b0cf0a635 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -150,7 +150,7 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { const unsigned stackSlotSize = sizeof(void*); - return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); + return ALIGN_UP(parmSize, stackSlotSize); } //********************************************************************** diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 0faa8695f562b..68c70cda7b817 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -104,7 +104,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { const unsigned stackSlotSize = sizeof(void*); - return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); + return ALIGN_UP(parmSize, stackSlotSize); } //********************************************************************** diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index af15ec94da116..c7d9d032d5cfc 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -101,7 +101,7 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatH #endif const unsigned stackSlotSize = sizeof(void*); - return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); + return ALIGN_UP(parmSize, stackSlotSize); } // diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 389f9094afbd7..3c7d700e8d953 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -105,7 +105,7 @@ EXTERN_C void SinglecastDelegateInvokeStub(); inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { const unsigned stackSlotSize = sizeof(void*); - return (parmSize + stackSlotSize - 1) & ~(stackSlotSize - 1); + return ALIGN_UP(parmSize, stackSlotSize); } #include "stublinkerx86.h" From e42895f2645f9d54e0ff42ef42541bab0db1a8a3 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 20 Jan 2021 01:15:31 -0800 Subject: [PATCH 23/25] fix wrong return for hfa. --- src/coreclr/vm/arm64/cgencpu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index c7d9d032d5cfc..42bf8038e716c 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -95,8 +95,9 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatH } if (isFloatHfa) { + _ASSERTE((parmSize % 4) == 0); // float hfa is not considered a struct type and passed with 4-byte alignment. - return sizeof(float); + return parmSize; } #endif From 2b9aa39ec15929e3d843599668c7ef79be07edb3 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 20 Jan 2021 11:18:38 -0800 Subject: [PATCH 24/25] fix nit. --- src/coreclr/vm/callingconvention.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 1cd8fe460f992..8a1e934d3d78d 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -634,7 +634,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; + pLoc->m_idxFloatReg = floatRegOfsInBytes / FLOAT_REGISTER_SIZE; pLoc->m_cFloatReg = ALIGN_UP(byteArgSize, FLOAT_REGISTER_SIZE) / FLOAT_REGISTER_SIZE; return; } @@ -673,7 +673,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; + const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); + _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); + pLoc->m_idxFloatReg = floatRegOfsInBytes / FLOAT_REGISTER_SIZE; if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()) { @@ -745,7 +747,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE #if defined(UNIX_AMD64_ABI) if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset)) { - pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / FLOAT_REGISTER_SIZE; + const int floatRegOfsInBytes = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters(); + _ASSERTE((floatRegOfsInBytes % FLOAT_REGISTER_SIZE) == 0); + pLoc->m_idxFloatReg = floatRegOfsInBytes / FLOAT_REGISTER_SIZE; pLoc->m_cFloatReg = 1; } else From e6c5cc60de2762627f28533aa5d43f4179c6816b Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 20 Jan 2021 17:07:25 -0800 Subject: [PATCH 25/25] Fix crossgen job. --- src/coreclr/classlibnative/bcltype/varargsnative.cpp | 2 +- src/coreclr/vm/amd64/cgencpu.h | 2 +- src/coreclr/vm/arm/cgencpu.h | 2 +- src/coreclr/vm/arm64/cgencpu.h | 2 +- src/coreclr/vm/i386/cgencpu.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/classlibnative/bcltype/varargsnative.cpp b/src/coreclr/classlibnative/bcltype/varargsnative.cpp index a53c0b09ba55a..a5f206aeef5de 100644 --- a/src/coreclr/classlibnative/bcltype/varargsnative.cpp +++ b/src/coreclr/classlibnative/bcltype/varargsnative.cpp @@ -116,7 +116,7 @@ static void InitCommon(VARARGS *data, VASigCookie** cookie) // Always skip over the varargs_cookie. const bool isValueType = false; const bool isFloatHfa = false; - data->ArgPtr += StackElemSize(sizeof(LPVOID), isValueType, isFloatHfa); + data->ArgPtr += StackElemSize(TARGET_POINTER_SIZE, isValueType, isFloatHfa); #endif } diff --git a/src/coreclr/vm/amd64/cgencpu.h b/src/coreclr/vm/amd64/cgencpu.h index 4f40b0cf0a635..7312ad0a019fe 100644 --- a/src/coreclr/vm/amd64/cgencpu.h +++ b/src/coreclr/vm/amd64/cgencpu.h @@ -149,7 +149,7 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T) inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { - const unsigned stackSlotSize = sizeof(void*); + const unsigned stackSlotSize = 8; return ALIGN_UP(parmSize, stackSlotSize); } diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 68c70cda7b817..c68e763e8945d 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -103,7 +103,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { - const unsigned stackSlotSize = sizeof(void*); + const unsigned stackSlotSize = 4; return ALIGN_UP(parmSize, stackSlotSize); } diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 42bf8038e716c..c87dbb11601f8 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -101,7 +101,7 @@ inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatH } #endif - const unsigned stackSlotSize = sizeof(void*); + const unsigned stackSlotSize = 8; return ALIGN_UP(parmSize, stackSlotSize); } diff --git a/src/coreclr/vm/i386/cgencpu.h b/src/coreclr/vm/i386/cgencpu.h index 3c7d700e8d953..04de3f584ae9e 100644 --- a/src/coreclr/vm/i386/cgencpu.h +++ b/src/coreclr/vm/i386/cgencpu.h @@ -104,7 +104,7 @@ EXTERN_C void SinglecastDelegateInvokeStub(); inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */) { - const unsigned stackSlotSize = sizeof(void*); + const unsigned stackSlotSize = 4; return ALIGN_UP(parmSize, stackSlotSize); }