Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
[x86/Linux] Stack align 16 bytes for JIT code
Browse files Browse the repository at this point in the history
Change JIT code to align stack in 16 byte used in modern compiler
  • Loading branch information
seanshpark committed Jan 24, 2017
1 parent d8b995b commit 0a47b2f
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 46 deletions.
9 changes: 6 additions & 3 deletions src/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3146,9 +3146,12 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
noway_assert(getEmitter()->emitMaxStackDepth <=
(compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
#if defined(UNIX_X86_ABI)
(genTypeStSz(TYP_INT) * 3) + // stack align may increase maximum value of 3
#endif
(compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
#endif

Expand Down
44 changes: 30 additions & 14 deletions src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2635,16 +2635,14 @@ void CodeGen::genLclHeap(GenTreePtr tree)
// Loop:
genDefineTempLabel(loop);

#if defined(_TARGET_AMD64_)
// Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
inst_IV(INS_push_hide, 0); // --- push 8-byte 0
inst_IV(INS_push_hide, 0); // --- push 8-byte 0
#elif defined(_TARGET_X86_)
// Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
inst_IV(INS_push_hide, 0); // --- push 4-byte 0
#endif // _TARGET_X86_
static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);

for (unsigned i = 0; i < count; i++)
{
inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
}
// Note that the stack must always be aligned to STACK_ALIGN bytes

// Decrement the loop counter and loop if not done.
inst_RV(INS_dec, regCnt, TYP_I_IMPL);
Expand Down Expand Up @@ -4884,9 +4882,8 @@ void CodeGen::genCallInstruction(GenTreePtr node)
stackArgBytes += argBytes;
}
else
{
#endif // FEATURE_PUT_STRUCT_ARG_STK

{
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
}
Expand Down Expand Up @@ -5125,6 +5122,15 @@ void CodeGen::genCallInstruction(GenTreePtr node)
retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
}

#if defined(UNIX_X86_ABI)
// put back the stack pointer before padding for aligned stack
unsigned padStackAlign = call->fgArgInfo->GetPadStkAlign();
if (padStackAlign != 0)
{
inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
#endif // UNIX_X86_ABI

// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
{
Expand Down Expand Up @@ -7774,6 +7780,16 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)

#ifdef _TARGET_X86_

#if defined(UNIX_X86_ABI)
// For each call, first stack argument has the padding for alignment
// if this value is not zero, use it adjust the ESP
unsigned argPadding = putArgStk->getArgPadding();
if (argPadding != 0)
{
inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
#endif

#ifdef FEATURE_SIMD
if (targetType == TYP_SIMD12)
{
Expand Down Expand Up @@ -8091,7 +8107,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
slotAttr = EA_BYREF;
}

const unsigned offset = i * 4;
const unsigned offset = i * TARGET_POINTER_SIZE;
if (srcAddrInReg)
{
getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
Expand All @@ -8100,7 +8116,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
genStackLevel += 4;
genStackLevel += TARGET_POINTER_SIZE;
}
#else // !defined(_TARGET_X86_)

Expand Down
16 changes: 16 additions & 0 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,9 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
#if defined(UNIX_X86_ABI)
unsigned padStkAlign; // num of padding slots for stack alignment
#endif

bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
Expand Down Expand Up @@ -1270,6 +1273,9 @@ class fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
#if defined(UNIX_X86_ABI)
unsigned padStkAlign; // Count of padding for stack alignment
#endif

unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
bool hasRegArgs; // true if we have one or more register arguments
Expand Down Expand Up @@ -1319,6 +1325,10 @@ class fgArgInfo

void ArgsComplete();

#if defined(UNIX_X86_ABI)
void ArgsAlignPadding();
#endif

void SortArgs();

void EvalArgsToTemps();
Expand All @@ -1338,6 +1348,12 @@ class fgArgInfo
{
return nextSlotNum;
}
#if defined(UNIX_X86_ABI)
unsigned GetPadStkAlign()
{
return padStkAlign;
}
#endif
bool HasRegArgs()
{
return hasRegArgs;
Expand Down
12 changes: 12 additions & 0 deletions src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4625,10 +4625,22 @@ struct GenTreePutArgStk : public GenTreeUnOp
}
#endif // FEATURE_FASTTAILCALL

#if !defined(UNIX_X86_ABI)
unsigned getArgOffset()
{
return gtSlotNum * TARGET_POINTER_SIZE;
}
#else
unsigned getArgOffset()
{
return (gtSlotNum & STACK_ALIGN_STKOFFSET) * TARGET_POINTER_SIZE;
}

unsigned getArgPadding()
{
return gtSlotNum >> STACK_ALIGN_PADDING;
}
#endif

#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
Expand Down
25 changes: 25 additions & 0 deletions src/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5794,6 +5794,7 @@ void Compiler::lvaAlignFrame()

#elif defined(_TARGET_X86_)

#if DOUBLE_ALIGN
if (genDoubleAlign())
{
// Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
Expand All @@ -5804,6 +5805,30 @@ void Compiler::lvaAlignFrame()
lvaIncrementFrameSize(sizeof(void*));
}
}
#endif

if (STACK_ALIGN > REGSIZE_BYTES)
{
if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
{
// If we are not doing final layout, we don't know the exact value of compLclFrameSize
// and thus do not know how much we will need to add in order to be aligned.
// We add the maximum pad that we could ever have (which is 12)
lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
}

// The stack must always be 16 byte aligned.
int adjustFrameSize = compLclFrameSize;
#if defined(UNIX_X86_ABI)
// we need to consider spilled register(s) plus return address and/or EBP
int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
#endif
if ((adjustFrameSize % STACK_ALIGN) != 0)
{
lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
}
}

#else
NYI("TARGET specific lvaAlignFrame");
Expand Down
11 changes: 8 additions & 3 deletions src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -933,15 +933,20 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP

PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
// correct
#if defined(UNIX_X86_ABI)
int slotNum = info->slotNum + (info->padStkAlign << STACK_ALIGN_PADDING);
#else
int slotNum = info->slotNum;
#endif

#if FEATURE_FASTTAILCALL
putArg = new (comp, GT_PUTARG_STK)
GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
GenTreePutArgStk(GT_PUTARG_STK, type, arg, slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
call->IsFastTailCall() DEBUGARG(call));
#else
putArg = new (comp, GT_PUTARG_STK)
putArg = new (comp, GT_PUTARG_STK)
GenTreePutArgStk(GT_PUTARG_STK, type, arg,
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif

#ifdef FEATURE_PUT_STRUCT_ARG_STK
Expand Down
103 changes: 77 additions & 26 deletions src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -855,9 +855,12 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
compiler = comp;
callTree = call;
assert(call->IsCall());
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = 0;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = 0;
#if defined(UNIX_X86_ABI)
padStkAlign = 0;
#endif
argTableSize = numArgs; // the allocated table size

hasRegArgs = false;
Expand Down Expand Up @@ -897,9 +900,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
;
callTree = newCall;
assert(newCall->IsCall());
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = oldArgInfo->stkLevel;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = oldArgInfo->stkLevel;
#if defined(UNIX_X86_ABI)
padStkAlign = oldArgInfo->padStkAlign;
#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
argTable = nullptr;
Expand Down Expand Up @@ -1079,16 +1085,19 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(
{
fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;

curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
curArgTabEntry->parent = parent;
curArgTabEntry->regNum = regNum;
curArgTabEntry->slotNum = 0;
curArgTabEntry->numRegs = numRegs;
curArgTabEntry->numSlots = 0;
curArgTabEntry->alignment = alignment;
curArgTabEntry->lateArgInx = (unsigned)-1;
curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
curArgTabEntry->parent = parent;
curArgTabEntry->regNum = regNum;
curArgTabEntry->slotNum = 0;
curArgTabEntry->numRegs = numRegs;
curArgTabEntry->numSlots = 0;
curArgTabEntry->alignment = alignment;
curArgTabEntry->lateArgInx = (unsigned)-1;
curArgTabEntry->tmpNum = (unsigned)-1;
#if defined(UNIX_X86_ABI)
curArgTabEntry->padStkAlign = 0;
#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
Expand Down Expand Up @@ -1154,16 +1163,19 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)

curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
curArgTabEntry->parent = parent;
curArgTabEntry->regNum = REG_STK;
curArgTabEntry->slotNum = nextSlotNum;
curArgTabEntry->numRegs = 0;
curArgTabEntry->numSlots = numSlots;
curArgTabEntry->alignment = alignment;
curArgTabEntry->lateArgInx = (unsigned)-1;
curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->argNum = argNum;
curArgTabEntry->node = node;
curArgTabEntry->parent = parent;
curArgTabEntry->regNum = REG_STK;
curArgTabEntry->slotNum = nextSlotNum;
curArgTabEntry->numRegs = 0;
curArgTabEntry->numSlots = numSlots;
curArgTabEntry->alignment = alignment;
curArgTabEntry->lateArgInx = (unsigned)-1;
curArgTabEntry->tmpNum = (unsigned)-1;
#if defined(UNIX_X86_ABI)
curArgTabEntry->padStkAlign = 0;
#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
Expand Down Expand Up @@ -1689,6 +1701,40 @@ void fgArgInfo::ArgsComplete()
argsComplete = true;
}

#if defined(UNIX_X86_ABI)
void fgArgInfo::ArgsAlignPadding()
{
// To get the padding amount, sum up all the slots and get the remainer for padding
unsigned curInx;
unsigned numSlots = 0;
fgArgTabEntryPtr firstArgTabEntry = nullptr;

for (curInx = 0; curInx < argCount; curInx++)
{
fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
if (curArgTabEntry->numSlots > 0)
{
// the argument may be REG_STK or constant or register that goes to stack
assert(nextSlotNum >= curArgTabEntry->slotNum);

numSlots += curArgTabEntry->numSlots;
if (firstArgTabEntry == nullptr)
{
// first argument will be used as for add padding
firstArgTabEntry = curArgTabEntry;
}
}
}
// calc padding to align
const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE;
if (firstArgTabEntry != nullptr)
{
firstArgTabEntry->padStkAlign = (numSlotsAligned - (numSlots % numSlotsAligned)) % numSlotsAligned;
this->padStkAlign = firstArgTabEntry->padStkAlign;
}
}
#endif // UNIX_X86_ABI

void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
Expand Down Expand Up @@ -4211,6 +4257,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (!reMorphing)
{
call->fgArgInfo->ArgsComplete();

#if defined(UNIX_X86_ABI)
call->fgArgInfo->ArgsAlignPadding();
#endif // UNIX_X86_ABI

#ifdef LEGACY_BACKEND
call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
Expand Down
8 changes: 8 additions & 0 deletions src/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,9 +495,17 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.

#define CODE_ALIGN 1 // code alignment requirement
#if !defined(UNIX_X86_ABI)
#define STACK_ALIGN 4 // stack alignment requirement
#define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
#define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
#else
#define STACK_ALIGN 16 // stack alignment requirement
#define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
#define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
#define STACK_ALIGN_PADDING 16 // Shift-right amount for padding and reset for offset
#define STACK_ALIGN_STKOFFSET ((2<<STACK_ALIGN_PADDING)-1)
#endif // !FEATURE_PAL

#define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI)
#define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX)
Expand Down

0 comments on commit 0a47b2f

Please sign in to comment.