Skip to content

Commit

Permalink
Fix poisoning for very large structs (#61521)
Browse files Browse the repository at this point in the history
For very large structs poisoning could end up generating instructions
requiring larger local var offsets than we can handle which would hit an
IMPL_LIMIT that throws InvalidProgramException. Switch to using rep
stosd (x86/x64)/memset helper (other platforms) when a local needs more
than a certain number of mov instructions to poison.

Also includes a register allocator change to mark killed registers as
modified in addRefsForPhysRegMask instead of by the (usually) calling
function, since this function is used directly in the change.
  • Loading branch information
jakobbotsch authored Nov 16, 2021
1 parent bd0c543 commit 9a9a4f3
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 44 deletions.
90 changes: 58 additions & 32 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12446,7 +12446,19 @@ void CodeGenInterface::VariableLiveKeeper::dumpLvaVariableLiveRanges() const
void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
{
assert(compiler->compShouldPoisonFrame());
assert((regLiveIn & genRegMask(REG_SCRATCH)) == 0);
#if defined(TARGET_XARCH)
regNumber poisonValReg = REG_EAX;
assert((regLiveIn & (RBM_EDI | RBM_ECX | RBM_EAX)) == 0);
#else
regNumber poisonValReg = REG_SCRATCH;
assert((regLiveIn & (genRegMask(REG_SCRATCH) | RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2)) == 0);
#endif

#ifdef TARGET_64BIT
const ssize_t poisonVal = (ssize_t)0xcdcdcdcdcdcdcdcd;
#else
const ssize_t poisonVal = (ssize_t)0xcdcdcdcd;
#endif

// The first time we need to poison something we will initialize a register to the largest immediate cccccccc that
// we can fit.
Expand All @@ -12461,49 +12473,63 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)

assert(varDsc->lvOnFrame);

int size = (int)compiler->lvaLclSize(varNum);

if (size / TARGET_POINTER_SIZE > 16)
unsigned int size = compiler->lvaLclSize(varNum);
if ((size / TARGET_POINTER_SIZE) > 16)
{
// For very large structs the offsets in the movs we emit below can
// grow too large to be handled properly by JIT. Furthermore, while
// this is only debug code, for very large structs this can bloat
// the code too much due to the singular movs used.
continue;
}

if (!hasPoisonImm)
{
#ifdef TARGET_64BIT
instGen_Set_Reg_To_Imm(EA_8BYTE, REG_SCRATCH, (ssize_t)0xcdcdcdcdcdcdcdcd);
// This will require more than 16 instructions, switch to rep stosd/memset call.
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(TARGET_XARCH)
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_EDI, (int)varNum, 0);
assert(size % 4 == 0);
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ECX, size / 4);
// On xarch we can leave the value in eax and only set eax once
// since rep stosd does not kill eax.
if (!hasPoisonImm)
{
instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_EAX, poisonVal);
hasPoisonImm = true;
}
instGen(INS_r_stosd);
#else
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_SCRATCH, (ssize_t)0xcdcdcdcd);
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0);
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast<char>(poisonVal));
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_2, size);
genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
// May kill REG_SCRATCH, so we need to reload it.
hasPoisonImm = false;
#endif
hasPoisonImm = true;
}
else
{
if (!hasPoisonImm)
{
instGen_Set_Reg_To_Imm(EA_PTRSIZE, poisonValReg, poisonVal);
hasPoisonImm = true;
}

// For 64-bit we check if the local is 8-byte aligned. For 32-bit, we assume everything is always 4-byte aligned.
#ifdef TARGET_64BIT
bool fpBased;
int addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
bool fpBased;
int addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
#else
int addr = 0;
int addr = 0;
#endif
int end = addr + size;
for (int offs = addr; offs < end;)
{
#ifdef TARGET_64BIT
if ((offs % 8) == 0 && end - offs >= 8)
int end = addr + (int)size;
for (int offs = addr; offs < end;)
{
GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr);
offs += 8;
continue;
}
#ifdef TARGET_64BIT
if ((offs % 8) == 0 && end - offs >= 8)
{
GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr);
offs += 8;
continue;
}
#endif

assert((offs % 4) == 0 && end - offs >= 4);
GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr);
offs += 4;
assert((offs % 4) == 0 && end - offs >= 4);
GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr);
offs += 4;
}
}
}
}
Expand Down
36 changes: 24 additions & 12 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,20 @@ bool LinearScan::isContainableMemoryOp(GenTree* node)
//
void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
{
if (refType == RefTypeKill)
{
// The mask identifies a set of registers that will be used during
// codegen. Mark these as modified here, so when we do final frame
// layout, we'll know about all these registers. This is especially
// important if mask contains callee-saved registers, which affect the
// frame size since we need to save/restore them. In the case where we
// have a copyBlk with GC pointers, can need to call the
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and
// RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as
// modified until codegen, which is too late.
compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true));
}

for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
{
if (mask & 1)
Expand Down Expand Up @@ -1137,16 +1151,6 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo

if (killMask != RBM_NONE)
{
// The killMask identifies a set of registers that will be used during codegen.
// Mark these as modified here, so when we do final frame layout, we'll know about
// all these registers. This is especially important if killMask contains
// callee-saved registers, which affect the frame size since we need to save/restore them.
// In the case where we have a copyBlk with GC pointers, can need to call the
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
// LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
// which is too late.
compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true));

addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);

// TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
Expand Down Expand Up @@ -2356,7 +2360,15 @@ void LinearScan::buildIntervals()
// into the scratch register, so it will be killed here.
if (compiler->compShouldPoisonFrame() && compiler->fgFirstBBisScratch() && block == compiler->fgFirstBB)
{
addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true);
regMaskTP killed;
#if defined(TARGET_XARCH)
// Poisoning uses EAX for small vars and rep stosd that kills edi, ecx and eax for large vars.
killed = RBM_EDI | RBM_ECX | RBM_EAX;
#else
// Poisoning uses REG_SCRATCH for small vars and memset helper for big vars.
killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
#endif
addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true);
currentLoc += 2;
}

Expand Down Expand Up @@ -3291,7 +3303,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
defCandidates = allRegs(type);
}
#else
defCandidates = allRegs(type);
defCandidates = allRegs(type);
#endif // TARGET_X86

RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index);
Expand Down
21 changes: 21 additions & 0 deletions src/tests/JIT/Directed/debugging/poison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@ public static unsafe int Main()
WithoutGCRef poisoned2;
Unsafe.SkipInit(out poisoned2);
result &= VerifyPoison(&poisoned2, sizeof(WithoutGCRef));

Massive poisoned3;
Unsafe.SkipInit(out poisoned3);
result &= VerifyPoison(&poisoned3, sizeof(Massive));

WithoutGCRef poisoned4;
Unsafe.SkipInit(out poisoned4);
result &= VerifyPoison(&poisoned4, sizeof(WithoutGCRef));

Massive poisoned5;
Unsafe.SkipInit(out poisoned5);
result &= VerifyPoison(&poisoned5, sizeof(Massive));

GCRef zeroed2;
Unsafe.SkipInit(out zeroed2);
result &= VerifyZero(Unsafe.AsPointer(ref zeroed2), Unsafe.SizeOf<GCRef>());

return result ? 100 : 101;
}
Expand Down Expand Up @@ -53,4 +69,9 @@ private struct WithoutGCRef
public int ANumber;
public float AFloat;
}

private unsafe struct Massive
{
public fixed byte Bytes[0x10008];
}
}

0 comments on commit 9a9a4f3

Please sign in to comment.