Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
[x86/Linux] Stack align 16 bytes for JIT code
Browse files Browse the repository at this point in the history
Change JIT code to align stack in 16 byte used in modern compiler
  • Loading branch information
seanshpark committed Feb 3, 2017
1 parent 0dedbd5 commit 6f4f1de
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 44 deletions.
15 changes: 10 additions & 5 deletions src/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3165,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
/* Check our max stack level. Needed for fgAddCodeRef().
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
noway_assert(getEmitter()->emitMaxStackDepth <=
(compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
{
unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
#if defined(UNIX_X86_ABI)
maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding
#endif
noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
}
#endif // EMIT_TRACK_STACK_DEPTH

*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
Expand Down
43 changes: 30 additions & 13 deletions src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2637,16 +2637,14 @@ void CodeGen::genLclHeap(GenTreePtr tree)
// Loop:
genDefineTempLabel(loop);

#if defined(_TARGET_AMD64_)
// Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
inst_IV(INS_push_hide, 0); // --- push 8-byte 0
inst_IV(INS_push_hide, 0); // --- push 8-byte 0
#elif defined(_TARGET_X86_)
// Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
inst_IV(INS_push_hide, 0); // --- push 4-byte 0
#endif // _TARGET_X86_
static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);

for (unsigned i = 0; i < count; i++)
{
inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
}
// Note that the stack must always be aligned to STACK_ALIGN bytes

// Decrement the loop counter and loop if not done.
inst_RV(INS_dec, regCnt, TYP_I_IMPL);
Expand Down Expand Up @@ -4886,9 +4884,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
stackArgBytes += argBytes;
}
else
{
#endif // FEATURE_PUT_STRUCT_ARG_STK

{
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
}
Expand Down Expand Up @@ -5127,6 +5125,15 @@ void CodeGen::genCallInstruction(GenTreePtr node)
retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
}

#if defined(UNIX_X86_ABI)
// Put back the stack pointer if there was any padding for stack alignment
unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign();
if (padStackAlign != 0)
{
inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
#endif // UNIX_X86_ABI

// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
{
Expand Down Expand Up @@ -7715,6 +7722,16 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)

#ifdef _TARGET_X86_

#if defined(UNIX_X86_ABI)
// For each call, first stack argument has the padding for alignment
// if this value is not zero, use it adjust the ESP
unsigned argPadding = putArgStk->getArgPadding();
if (argPadding != 0)
{
inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
#endif

#ifdef FEATURE_SIMD
if (targetType == TYP_SIMD12)
{
Expand Down Expand Up @@ -8032,7 +8049,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
slotAttr = EA_BYREF;
}

const unsigned offset = i * 4;
const unsigned offset = i * TARGET_POINTER_SIZE;
if (srcAddrInReg)
{
getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
Expand All @@ -8041,7 +8058,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
genStackLevel += 4;
genStackLevel += TARGET_POINTER_SIZE;
}
#else // !defined(_TARGET_X86_)

Expand Down
19 changes: 19 additions & 0 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,11 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
#if defined(UNIX_X86_ABI)
unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first
// argument may have a value to emit "sub esp, n" to adjust the stack before pushing
// the argument.
#endif

bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
Expand Down Expand Up @@ -1262,6 +1267,10 @@ class fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
#if defined(UNIX_X86_ABI)
unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back
// stack pointer before it was adjusted after each Call
#endif

unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
bool hasRegArgs; // true if we have one or more register arguments
Expand Down Expand Up @@ -1311,6 +1320,10 @@ class fgArgInfo

void ArgsComplete();

#if defined(UNIX_X86_ABI)
void ArgsAlignPadding();
#endif

void SortArgs();

void EvalArgsToTemps();
Expand All @@ -1330,6 +1343,12 @@ class fgArgInfo
{
return nextSlotNum;
}
#if defined(UNIX_X86_ABI)
unsigned GetPadStackAlign()
{
return padStkAlign;
}
#endif
bool HasRegArgs()
{
return hasRegArgs;
Expand Down
27 changes: 27 additions & 0 deletions src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -4546,6 +4546,9 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
struct GenTreePutArgStk : public GenTreeUnOp
{
unsigned gtSlotNum; // Slot number of the argument to be passed on stack
#if defined(UNIX_X86_ABI)
unsigned gtPadAlign; // Number of padding slots for stack alignment
#endif

#if FEATURE_FASTTAILCALL
bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
Expand All @@ -4561,6 +4564,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
Expand All @@ -4582,6 +4588,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
Expand All @@ -4603,6 +4612,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
Expand All @@ -4622,6 +4634,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
Expand All @@ -4640,6 +4655,18 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtSlotNum * TARGET_POINTER_SIZE;
}

#if defined(UNIX_X86_ABI)
unsigned getArgPadding()
{
return gtPadAlign;
}

void setArgPadding(unsigned padAlign)
{
gtPadAlign = padAlign;
}
#endif

#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
{
Expand Down
25 changes: 25 additions & 0 deletions src/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5773,6 +5773,7 @@ void Compiler::lvaAlignFrame()

#elif defined(_TARGET_X86_)

#if DOUBLE_ALIGN
if (genDoubleAlign())
{
// Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
Expand All @@ -5783,6 +5784,30 @@ void Compiler::lvaAlignFrame()
lvaIncrementFrameSize(sizeof(void*));
}
}
#endif

if (STACK_ALIGN > REGSIZE_BYTES)
{
if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
{
// If we are not doing final layout, we don't know the exact value of compLclFrameSize
// and thus do not know how much we will need to add in order to be aligned.
// We add the maximum pad that we could ever have (which is 12)
lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
}

// The stack must always be 16 byte aligned.
int adjustFrameSize = compLclFrameSize;
#if defined(UNIX_X86_ABI)
// we need to consider spilled register(s) plus return address and/or EBP
int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
#endif
if ((adjustFrameSize % STACK_ALIGN) != 0)
{
lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
}
}

#else
NYI("TARGET specific lvaAlignFrame");
Expand Down
4 changes: 4 additions & 0 deletions src/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,10 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif

#if defined(UNIX_X86_ABI)
putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
#endif

#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
Expand Down
Loading

0 comments on commit 6f4f1de

Please sign in to comment.