From 5df45286968775ffccaf01669eba7c8baa9923e1 Mon Sep 17 00:00:00 2001 From: SaeHie Park Date: Mon, 9 Jan 2017 10:39:54 +0900 Subject: [PATCH] [x86/Linux] Stack align 16 bytes for JIT code Change JIT code to align stack in 16 byte used in modern compiler --- src/jit/codegencommon.cpp | 15 +++-- src/jit/codegenxarch.cpp | 43 +++++++++----- src/jit/compiler.h | 19 +++++++ src/jit/gentree.h | 27 +++++++++ src/jit/lclvars.cpp | 25 +++++++++ src/jit/lower.cpp | 5 ++ src/jit/morph.cpp | 115 +++++++++++++++++++++++++++++--------- src/jit/target.h | 6 ++ 8 files changed, 211 insertions(+), 44 deletions(-) diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 396ea77374b4..b1e474b75506 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -3165,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) /* Check our max stack level. Needed for fgAddCodeRef(). We need to relax the assert as our estimation won't include code-gen stack changes (which we know don't affect fgAddCodeRef()) */ - noway_assert(getEmitter()->emitMaxStackDepth <= - (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments. - compiler->compHndBBtabCount + // Return address for locally-called finallys - genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc - (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args + { + unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments. + compiler->compHndBBtabCount + // Return address for locally-called finallys + genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc + (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args +#if defined(UNIX_X86_ABI) + maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding #endif + noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth); + } +#endif // EMIT_TRACK_STACK_DEPTH *nativeSizeOfCode = codeSize; compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize; diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 7367dbb8f1e6..c88b9592eeaf 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -2645,16 +2645,14 @@ void CodeGen::genLclHeap(GenTreePtr tree) // Loop: genDefineTempLabel(loop); -#if defined(_TARGET_AMD64_) - // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value. - static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); - inst_IV(INS_push_hide, 0); // --- push 8-byte 0 - inst_IV(INS_push_hide, 0); // --- push 8-byte 0 -#elif defined(_TARGET_X86_) - // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value. - static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES); - inst_IV(INS_push_hide, 0); // --- push 4-byte 0 -#endif // _TARGET_X86_ + static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0); + unsigned const count = (STACK_ALIGN / REGSIZE_BYTES); + + for (unsigned i = 0; i < count; i++) + { + inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0 + } + // Note that the stack must always be aligned to STACK_ALIGN bytes // Decrement the loop counter and loop if not done. inst_RV(INS_dec, regCnt, TYP_I_IMPL); @@ -4894,9 +4892,9 @@ void CodeGen::genCallInstruction(GenTreePtr node) stackArgBytes += argBytes; } else - { #endif // FEATURE_PUT_STRUCT_ARG_STK + { stackArgBytes += genTypeSize(genActualType(arg->TypeGet())); } } @@ -5135,6 +5133,15 @@ void CodeGen::genCallInstruction(GenTreePtr node) retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset); } +#if defined(UNIX_X86_ABI) + // Put back the stack pointer if there was any padding for stack alignment + unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign(); + if (padStackAlign != 0) + { + inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE); + } +#endif // UNIX_X86_ABI + // if it was a pinvoke we may have needed to get the address of a label if (genPendingCallLabel) { @@ -7753,6 +7760,16 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) #ifdef _TARGET_X86_ +#if defined(UNIX_X86_ABI) + // For each call, first stack argument has the padding for alignment + // if this value is not zero, use it to adjust the ESP + unsigned argPadding = putArgStk->getArgPadding(); + if (argPadding != 0) + { + inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE); + } +#endif + if (varTypeIsStruct(targetType)) { (void)genAdjustStackForPutArgStk(putArgStk); @@ -8070,7 +8087,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) slotAttr = EA_BYREF; } - const unsigned offset = i * 4; + const unsigned offset = i * TARGET_POINTER_SIZE; if (srcAddrInReg) { getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset); @@ -8079,7 +8096,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) { getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset); } - genStackLevel += 4; + genStackLevel += TARGET_POINTER_SIZE; } #else // !defined(_TARGET_X86_) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 167d8090b5fa..6f57813417ac 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1186,6 +1186,11 @@ struct fgArgTabEntry unsigned alignment; // 1 or 2 (slots/registers) unsigned lateArgInx; // index into gtCallLateArgs list unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg +#if defined(UNIX_X86_ABI) + unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first + // argument may have a value to emit "sub esp, n" to adjust the stack before pushing + // the argument. +#endif bool isSplit : 1; // True when this argument is split between the registers and OutArg area bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar @@ -1263,6 +1268,10 @@ class fgArgInfo unsigned argCount; // Updatable arg count value unsigned nextSlotNum; // Updatable slot count value unsigned stkLevel; // Stack depth when we make this call (for x86) +#if defined(UNIX_X86_ABI) + unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back + // stack pointer before it was adjusted after each Call +#endif unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs) bool hasRegArgs; // true if we have one or more register arguments @@ -1312,6 +1321,10 @@ class fgArgInfo void ArgsComplete(); +#if defined(UNIX_X86_ABI) + void ArgsAlignPadding(); +#endif + void SortArgs(); void EvalArgsToTemps(); @@ -1331,6 +1344,12 @@ class fgArgInfo { return nextSlotNum; } +#if defined(UNIX_X86_ABI) + unsigned GetPadStackAlign() + { + return padStkAlign; + } +#endif bool HasRegArgs() { return hasRegArgs; diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 10ba4b09a599..da61debf2720 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -4546,6 +4546,9 @@ struct GenTreePhiArg : public GenTreeLclVarCommon struct GenTreePutArgStk : public GenTreeUnOp { unsigned gtSlotNum; // Slot number of the argument to be passed on stack +#if defined(UNIX_X86_ABI) + unsigned gtPadAlign; // Number of padding slots for stack alignment +#endif #if FEATURE_FASTTAILCALL bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area. @@ -4561,6 +4564,9 @@ struct GenTreePutArgStk : public GenTreeUnOp DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type DEBUGARG(largeNode)) , gtSlotNum(slotNum) +#if defined(UNIX_X86_ABI) + , gtPadAlign(0) +#endif , putInIncomingArgArea(_putInIncomingArgArea) #ifdef FEATURE_PUT_STRUCT_ARG_STK , gtPutArgStkKind(Kind::Invalid) @@ -4582,6 +4588,9 @@ struct GenTreePutArgStk : public GenTreeUnOp DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)) , gtSlotNum(slotNum) +#if defined(UNIX_X86_ABI) + , gtPadAlign(0) +#endif , putInIncomingArgArea(_putInIncomingArgArea) #ifdef FEATURE_PUT_STRUCT_ARG_STK , gtPutArgStkKind(Kind::Invalid) @@ -4603,6 +4612,9 @@ struct GenTreePutArgStk : public GenTreeUnOp DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type DEBUGARG(largeNode)) , gtSlotNum(slotNum) +#if defined(UNIX_X86_ABI) + , gtPadAlign(0) +#endif #ifdef FEATURE_PUT_STRUCT_ARG_STK , gtPutArgStkKind(Kind::Invalid) , gtNumSlots(numSlots) @@ -4622,6 +4634,9 @@ struct GenTreePutArgStk : public GenTreeUnOp DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)) , gtSlotNum(slotNum) +#if defined(UNIX_X86_ABI) + , gtPadAlign(0) +#endif #ifdef FEATURE_PUT_STRUCT_ARG_STK , gtPutArgStkKind(Kind::Invalid) , gtNumSlots(numSlots) @@ -4640,6 +4655,18 @@ struct GenTreePutArgStk : public GenTreeUnOp return gtSlotNum * TARGET_POINTER_SIZE; } +#if defined(UNIX_X86_ABI) + unsigned getArgPadding() + { + return gtPadAlign; + } + + void setArgPadding(unsigned padAlign) + { + gtPadAlign = padAlign; + } +#endif + #ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned getArgSize() { diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 3af0813fa7c2..cef861e64191 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -5773,6 +5773,7 @@ void Compiler::lvaAlignFrame() #elif defined(_TARGET_X86_) +#if DOUBLE_ALIGN if (genDoubleAlign()) { // Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals() @@ -5783,6 +5784,30 @@ void Compiler::lvaAlignFrame() lvaIncrementFrameSize(sizeof(void*)); } } +#endif + + if (STACK_ALIGN > REGSIZE_BYTES) + { + if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) + { + // If we are not doing final layout, we don't know the exact value of compLclFrameSize + // and thus do not know how much we will need to add in order to be aligned. + // We add the maximum pad that we could ever have (which is 12) + lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES); + } + + // Align the stack with STACK_ALIGN value. + int adjustFrameSize = compLclFrameSize; +#if defined(UNIX_X86_ABI) + // we need to consider spilled register(s) plus return address and/or EBP + int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0); + adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN; +#endif + if ((adjustFrameSize % STACK_ALIGN) != 0) + { + lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN)); + } + } #else NYI("TARGET specific lvaAlignFrame"); diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 1ac4ef4cbf8f..ce383e22a6c7 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -943,6 +943,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call)); #endif +#if defined(UNIX_X86_ABI) + assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0)); + putArg->AsPutArgStk()->setArgPadding(info->padStkAlign); +#endif + #ifdef FEATURE_PUT_STRUCT_ARG_STK // If the ArgTabEntry indicates that this arg is a struct // get and store the number of slots that are references. diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 99ef15a3b87d..17eb755b2ccc 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -855,9 +855,12 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs) compiler = comp; callTree = call; assert(call->IsCall()); - argCount = 0; // filled in arg count, starts at zero - nextSlotNum = INIT_ARG_STACK_SLOT; - stkLevel = 0; + argCount = 0; // filled in arg count, starts at zero + nextSlotNum = INIT_ARG_STACK_SLOT; + stkLevel = 0; +#if defined(UNIX_X86_ABI) + padStkAlign = 0; +#endif argTableSize = numArgs; // the allocated table size hasRegArgs = false; @@ -897,9 +900,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) ; callTree = newCall; assert(newCall->IsCall()); - argCount = 0; // filled in arg count, starts at zero - nextSlotNum = INIT_ARG_STACK_SLOT; - stkLevel = oldArgInfo->stkLevel; + argCount = 0; // filled in arg count, starts at zero + nextSlotNum = INIT_ARG_STACK_SLOT; + stkLevel = oldArgInfo->stkLevel; +#if defined(UNIX_X86_ABI) + padStkAlign = oldArgInfo->padStkAlign; +#endif argTableSize = oldArgInfo->argTableSize; argsComplete = false; argTable = nullptr; @@ -1079,16 +1085,19 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg( { fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = regNum; - curArgTabEntry->slotNum = 0; - curArgTabEntry->numRegs = numRegs; - curArgTabEntry->numSlots = 0; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; +#if defined(UNIX_X86_ABI) + curArgTabEntry->padStkAlign = 0; +#endif curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1154,16 +1163,19 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->isStruct = isStruct; // is this a struct arg #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = REG_STK; - curArgTabEntry->slotNum = nextSlotNum; - curArgTabEntry->numRegs = 0; - curArgTabEntry->numSlots = numSlots; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = REG_STK; + curArgTabEntry->slotNum = nextSlotNum; + curArgTabEntry->numRegs = 0; + curArgTabEntry->numSlots = numSlots; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; +#if defined(UNIX_X86_ABI) + curArgTabEntry->padStkAlign = 0; +#endif curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1689,6 +1701,52 @@ void fgArgInfo::ArgsComplete() argsComplete = true; } +#if defined(UNIX_X86_ABI) +// Get the stack alignment value for a Call holding this object +// +// NOTE: This function will calculate number of padding slots, to align the +// stack before pushing arguments to the stack. Padding value is stored in +// the first argument in fgArgTabEntry structure padStkAlign member so that +// code (sub esp, n) can be emitted before generating argument push in +// fgArgTabEntry node. As of result stack will be aligned right before +// making a "Call". After the Call, stack is re-adjusted to the value it +// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry. +// +void fgArgInfo::ArgsAlignPadding() +{ + // To get the padding amount, sum up all the slots and get the remainder for padding + unsigned curInx; + unsigned numSlots = 0; + fgArgTabEntryPtr firstArgTabEntry = nullptr; + + for (curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + if (curArgTabEntry->numSlots > 0) + { + // The argument may be REG_STK or constant or register that goes to stack + assert(nextSlotNum >= curArgTabEntry->slotNum); + + numSlots += curArgTabEntry->numSlots; + if (firstArgTabEntry == nullptr) + { + // First argument will be used to hold the padding amount + firstArgTabEntry = curArgTabEntry; + } + } + } + + if (firstArgTabEntry != nullptr) + { + const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE; + // Set stack align pad for the first argument + firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned); + // Set also for fgArgInfo that will be used to reset stack pointer after the Call + this->padStkAlign = firstArgTabEntry->padStkAlign; + } +} +#endif // UNIX_X86_ABI + void fgArgInfo::SortArgs() { assert(argsComplete == true); @@ -4211,6 +4269,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (!reMorphing) { call->fgArgInfo->ArgsComplete(); + +#if defined(UNIX_X86_ABI) + call->fgArgInfo->ArgsAlignPadding(); +#endif // UNIX_X86_ABI + #ifdef LEGACY_BACKEND call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum); #if defined(_TARGET_ARM_) diff --git a/src/jit/target.h b/src/jit/target.h index 6330d52889e3..5b608ddfac9e 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -495,9 +495,15 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call. #define CODE_ALIGN 1 // code alignment requirement +#if !defined(UNIX_X86_ABI) #define STACK_ALIGN 4 // stack alignment requirement #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs #define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units +#else + #define STACK_ALIGN 16 // stack alignment requirement + #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs + #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units +#endif // !UNIX_X86_ABI #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI) #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX)