Skip to content

Commit

Permalink
Arm64 apple vm fixes for arg alignment. (#46665)
Browse files Browse the repository at this point in the history
* Add `MarshalInfo::IsValueClass`.

* Add `TypeHandle* pTypeHandle` to `SizeOf`.

* Add a few asserts/start using inline function instead of macro.

* use TARGET_POINTER_SIZE instead of STACK_ELEM_SIZE.

* Use `m_curOfs` instead of `m_idxStack` in `ArgIteratorBase` on all platforms.

Before some platforms were using stackSlots, some curOfs in bytes.

* Use byte sizes and offsets in `ArgLocDesc`.

Fix arm32.

x86 fixes.

use StackSize on ArgSizes

Add `GetStackArgumentByteIndexFromOffset` and return back the old values for asserts.

another fix

* Stop using `#define STACK_ELEM_SIZE`

* Add `isFloatHfa`.

* delete checking code.

because it won't pass on arm64 apple.

* arm64 apple fixes.

* roundUp the stack size.

* Add a reflection test.

* Return byte offset from `GetNextOfs`.

It is not a complete fix for arm64 apple, but covers most cases.

* Add `FLOAT_REGISTER_SIZE`

* Use StackElemSize for ` pLoc->m_byteStackSize`.

* replace `assert` with `_ASSERTE`.

* Use `ALIGN_UP` in the code that I have changed.

* rename `m_curOfs` as `m_ofsStack`.

* delete "ceremony " from `StackElemSize`.

* Delete `cSlots` and don't call `StackElemSize` on `GetArgSize`.

* Fix an assert.

* Fix nit.

* fix wrong return for hfa<float>.

* fix nit.

* Fix crossgen job.
  • Loading branch information
Sergey Andreenko authored Jan 21, 2021
1 parent bf573c7 commit 648437b
Show file tree
Hide file tree
Showing 23 changed files with 385 additions and 195 deletions.
39 changes: 24 additions & 15 deletions src/coreclr/classlibnative/bcltype/varargsnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
// pointer to achieve such an alignment for the next argument on those platforms (otherwise it is a no-op).
// NOTE: the debugger has its own implementation of this algorithm in Debug\DI\RsType.cpp, CordbType::RequiresAlign8()
// so if you change this implementation be sure to update the debugger's version as well.
static void AdjustArgPtrForAlignment(VARARGS *pData, size_t cbArg)
static void AdjustArgPtrForAlignment(VARARGS *pData, unsigned cbArg)
{
#ifdef TARGET_ARM
// Only 64-bit primitives or value types with embedded 64-bit primitives are aligned on 64-bit boundaries.
Expand Down Expand Up @@ -114,7 +114,9 @@ static void InitCommon(VARARGS *data, VASigCookie** cookie)
// which is the start of the first fixed arg (arg1).

// Always skip over the varargs_cookie.
data->ArgPtr += StackElemSize(sizeof(LPVOID));
const bool isValueType = false;
const bool isFloatHfa = false;
data->ArgPtr += StackElemSize(TARGET_POINTER_SIZE, isValueType, isFloatHfa);
#endif
}

Expand All @@ -138,9 +140,11 @@ void AdvanceArgPtr(VARARGS *data)
break;

SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic
SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext);
SIZE_T cbArg = StackElemSize(cbRaw);

TypeHandle thValueType;
const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule, &typeContext, &thValueType);
const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType());
const bool isFloatHfa = false;
unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa);
#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE
if (ArgIterator::IsVarArgPassedByRef(cbRaw))
cbArg = sizeof(void*);
Expand Down Expand Up @@ -263,9 +267,11 @@ VarArgsNative::Init2,
}

SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic
SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext);
SIZE_T cbArg = StackElemSize(cbRaw);

TypeHandle thValueType;
unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType);
const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType());
const bool isFloatHfa = false;
unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa);
#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE
if (ArgIterator::IsVarArgPassedByRef(cbRaw))
cbArg = sizeof(void*);
Expand Down Expand Up @@ -401,7 +407,8 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC
TypeHandle typehandle = refType->GetType();

_ASSERTE(_this != NULL);
UINT size = 0;
unsigned size = 0;
bool isValueType = false;

CorElementType typ = typehandle.GetInternalCorElementType();
if (CorTypeInfo::IsPrimitiveType(typ))
Expand All @@ -414,15 +421,15 @@ FCIMPL3(void, VarArgsNative::GetNextArg2, VARARGS* _this, void * value, ReflectC
}
else if (typ == ELEMENT_TYPE_VALUETYPE)
{
isValueType = true;
size = typehandle.AsMethodTable()->GetNativeSize();
}
else
{
COMPlusThrow(kNotSupportedException, W("NotSupported_Type"));
}

size = StackElemSize(size);

const bool isFloatHfa = false;
size = StackElemSize(size, isValueType, isFloatHfa);
AdjustArgPtrForAlignment(_this, size);

#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE
Expand Down Expand Up @@ -472,9 +479,11 @@ VarArgsNative::GetNextArgHelper(
_ASSERTE(data->RemainingArgs != 0);

SigTypeContext typeContext; // This is an empty type context. This is OK because the vararg methods may not be generic
SIZE_T cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext);
SIZE_T cbArg = StackElemSize(cbRaw);

TypeHandle thValueType;
const unsigned cbRaw = data->SigPtr.SizeOf(data->ArgCookie->pModule,&typeContext, &thValueType);
const bool isValueType = (!thValueType.IsNull() && thValueType.IsValueType());
const bool isFloatHfa = false;
unsigned cbArg = StackElemSize(cbRaw, isValueType, isFloatHfa);
AdjustArgPtrForAlignment(data, cbArg);

// Get a pointer to the beginning of the argument.
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/gcinfo/gcinfoencoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,8 +577,10 @@ GcSlotId GcInfoEncoder::GetStackSlotId( INT32 spOffset, GcSlotFlags flags, GcSta

_ASSERTE( (flags & (GC_SLOT_IS_REGISTER | GC_SLOT_IS_DELETED)) == 0 );

#if !defined(OSX_ARM64_ABI)
// the spOffset for the stack slot is required to be pointer size aligned
_ASSERTE((spOffset % TARGET_POINTER_SIZE) == 0);
#endif

m_SlotTable[ m_NumSlots ].Slot.Stack.SpOffset = spOffset;
m_SlotTable[ m_NumSlots ].Slot.Stack.Base = spBase;
Expand Down
10 changes: 2 additions & 8 deletions src/coreclr/inc/stdmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,17 +185,11 @@ inline size_t ALIGN_UP( size_t val, size_t alignment )
_ASSERTE( result >= val ); // check for overflow
return result;
}
inline void* ALIGN_UP( void* val, size_t alignment )
{
WRAPPER_NO_CONTRACT;

return (void*) ALIGN_UP( (size_t)val, alignment );
}
inline uint8_t* ALIGN_UP( uint8_t* val, size_t alignment )
template <typename T> inline T ALIGN_UP(T val, size_t alignment)
{
WRAPPER_NO_CONTRACT;

return (uint8_t*) ALIGN_UP( (size_t)val, alignment );
return (T)ALIGN_UP((size_t)val, alignment);
}

inline size_t ALIGN_DOWN( size_t val, size_t alignment )
Expand Down
12 changes: 7 additions & 5 deletions src/coreclr/vm/amd64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr);
#define X86RegFromAMD64Reg(extended_reg) \
((X86Reg)(((int)extended_reg) & X86_REGISTER_MASK))

#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes.

// Why is the return value ARG_SLOT? On 64-bit systems, that is 64-bits
// and much bigger than necessary for R4, requiring explicit downcasts.
inline
Expand Down Expand Up @@ -145,11 +147,11 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T)
// Parameter size
//**********************************************************************

typedef INT64 StackElemType;
#define STACK_ELEM_SIZE sizeof(StackElemType)

// !! This expression assumes STACK_ELEM_SIZE is a power of 2.
#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1)))
inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */)
{
const unsigned stackSlotSize = 8;
return ALIGN_UP(parmSize, stackSlotSize);
}

//**********************************************************************
// Frames
Expand Down
12 changes: 7 additions & 5 deletions src/coreclr/vm/arm/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,17 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
// Offset of pc register
#define PC_REG_RELATIVE_OFFSET 4

#define FLOAT_REGISTER_SIZE 4 // each register in FloatArgumentRegisters is 4 bytes.

//**********************************************************************
// Parameter size
//**********************************************************************

typedef INT32 StackElemType;
#define STACK_ELEM_SIZE sizeof(StackElemType)

// !! This expression assumes STACK_ELEM_SIZE is a power of 2.
#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1)))
inline unsigned StackElemSize(unsigned parmSize, bool isValueType = false /* unused */, bool isFloatHfa = false /* unused */)
{
const unsigned stackSlotSize = 4;
return ALIGN_UP(parmSize, stackSlotSize);
}

//**********************************************************************
// Frames
Expand Down
28 changes: 22 additions & 6 deletions src/coreclr/vm/arm64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ extern PCODE GetPreStubEntryPoint();
#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter
#define CALLDESCR_RETBUFFARGREG 1 // CallDescrWorker has RetBuffArg parameter that's separate from arg regs

#define FLOAT_REGISTER_SIZE 16 // each register in FloatArgumentRegisters is 16 bytes.

// Given a return address retrieved during stackwalk,
// this is the offset by which it should be decremented to arrive at the callsite.
#define STACKWALK_CONTROLPC_ADJUST_OFFSET 4
Expand All @@ -81,13 +83,27 @@ void R8ToFPSpill(void* pSpillSlot, SIZE_T srcDoubleAsSIZE_T)
// Parameter size
//**********************************************************************

typedef INT64 StackElemType;
#define STACK_ELEM_SIZE sizeof(StackElemType)

// The expression below assumes STACK_ELEM_SIZE is a power of 2, so check that.
static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE must be a power of 2");
inline unsigned StackElemSize(unsigned parmSize, bool isValueType, bool isFloatHfa)
{
#if defined(OSX_ARM64_ABI)
if (!isValueType)
{
// The primitive types' sizes are expected to be powers of 2.
_ASSERTE((parmSize & (parmSize - 1)) == 0);
// No padding/alignment for primitive types.
return parmSize;
}
if (isFloatHfa)
{
_ASSERTE((parmSize % 4) == 0);
// float hfa is not considered a struct type and passed with 4-byte alignment.
return parmSize;
}
#endif

#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1)))
const unsigned stackSlotSize = 8;
return ALIGN_UP(parmSize, stackSlotSize);
}

//
// JIT HELPERS.
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/vm/callhelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,8 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT *
CallDescrData callDescrData;

callDescrData.pSrc = pTransitionBlock + sizeof(TransitionBlock);
callDescrData.numStackSlots = nStackBytes / STACK_ELEM_SIZE;
_ASSERTE((nStackBytes % TARGET_POINTER_SIZE) == 0);
callDescrData.numStackSlots = nStackBytes / TARGET_POINTER_SIZE;
#ifdef CALLDESCR_ARGREGS
callDescrData.pArgumentRegisters = (ArgumentRegisters*)(pTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters());
#endif
Expand Down
Loading

0 comments on commit 648437b

Please sign in to comment.