Skip to content

Commit 9a9a4f3

Browse files
authored
Fix poisoning for very large structs (#61521)
For very large structs poisoning could end up generating instructions requiring larger local var offsets than we can handle which would hit an IMPL_LIMIT that throws InvalidProgramException. Switch to using rep stosd (x86/x64)/memset helper (other platforms) when a local needs more than a certain number of mov instructions to poison. Also includes a register allocator change to mark killed registers as modified in addRefsForPhysRegMask instead of by the (usually) calling function, since this function is used directly in the change.
1 parent bd0c543 commit 9a9a4f3

File tree

3 files changed

+103
-44
lines changed

3 files changed

+103
-44
lines changed

src/coreclr/jit/codegencommon.cpp

+58-32
Original file line numberDiff line numberDiff line change
@@ -12446,7 +12446,19 @@ void CodeGenInterface::VariableLiveKeeper::dumpLvaVariableLiveRanges() const
1244612446
void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
1244712447
{
1244812448
assert(compiler->compShouldPoisonFrame());
12449-
assert((regLiveIn & genRegMask(REG_SCRATCH)) == 0);
12449+
#if defined(TARGET_XARCH)
12450+
regNumber poisonValReg = REG_EAX;
12451+
assert((regLiveIn & (RBM_EDI | RBM_ECX | RBM_EAX)) == 0);
12452+
#else
12453+
regNumber poisonValReg = REG_SCRATCH;
12454+
assert((regLiveIn & (genRegMask(REG_SCRATCH) | RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2)) == 0);
12455+
#endif
12456+
12457+
#ifdef TARGET_64BIT
12458+
const ssize_t poisonVal = (ssize_t)0xcdcdcdcdcdcdcdcd;
12459+
#else
12460+
const ssize_t poisonVal = (ssize_t)0xcdcdcdcd;
12461+
#endif
1245012462

1245112463
// The first time we need to poison something we will initialize a register to the largest immediate cccccccc that
1245212464
// we can fit.
@@ -12461,49 +12473,63 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn)
1246112473

1246212474
assert(varDsc->lvOnFrame);
1246312475

12464-
int size = (int)compiler->lvaLclSize(varNum);
12465-
12466-
if (size / TARGET_POINTER_SIZE > 16)
12476+
unsigned int size = compiler->lvaLclSize(varNum);
12477+
if ((size / TARGET_POINTER_SIZE) > 16)
1246712478
{
12468-
// For very large structs the offsets in the movs we emit below can
12469-
// grow too large to be handled properly by JIT. Furthermore, while
12470-
// this is only debug code, for very large structs this can bloat
12471-
// the code too much due to the singular movs used.
12472-
continue;
12473-
}
12474-
12475-
if (!hasPoisonImm)
12476-
{
12477-
#ifdef TARGET_64BIT
12478-
instGen_Set_Reg_To_Imm(EA_8BYTE, REG_SCRATCH, (ssize_t)0xcdcdcdcdcdcdcdcd);
12479+
// This will require more than 16 instructions, switch to rep stosd/memset call.
12480+
CLANG_FORMAT_COMMENT_ANCHOR;
12481+
#if defined(TARGET_XARCH)
12482+
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_EDI, (int)varNum, 0);
12483+
assert(size % 4 == 0);
12484+
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ECX, size / 4);
12485+
// On xarch we can leave the value in eax and only set eax once
12486+
// since rep stosd does not kill eax.
12487+
if (!hasPoisonImm)
12488+
{
12489+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_EAX, poisonVal);
12490+
hasPoisonImm = true;
12491+
}
12492+
instGen(INS_r_stosd);
1247912493
#else
12480-
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_SCRATCH, (ssize_t)0xcdcdcdcd);
12494+
GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0);
12495+
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast<char>(poisonVal));
12496+
instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_2, size);
12497+
genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
12498+
// May kill REG_SCRATCH, so we need to reload it.
12499+
hasPoisonImm = false;
1248112500
#endif
12482-
hasPoisonImm = true;
1248312501
}
12502+
else
12503+
{
12504+
if (!hasPoisonImm)
12505+
{
12506+
instGen_Set_Reg_To_Imm(EA_PTRSIZE, poisonValReg, poisonVal);
12507+
hasPoisonImm = true;
12508+
}
1248412509

1248512510
// For 64-bit we check if the local is 8-byte aligned. For 32-bit, we assume everything is always 4-byte aligned.
1248612511
#ifdef TARGET_64BIT
12487-
bool fpBased;
12488-
int addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
12512+
bool fpBased;
12513+
int addr = compiler->lvaFrameAddress((int)varNum, &fpBased);
1248912514
#else
12490-
int addr = 0;
12515+
int addr = 0;
1249112516
#endif
12492-
int end = addr + size;
12493-
for (int offs = addr; offs < end;)
12494-
{
12495-
#ifdef TARGET_64BIT
12496-
if ((offs % 8) == 0 && end - offs >= 8)
12517+
int end = addr + (int)size;
12518+
for (int offs = addr; offs < end;)
1249712519
{
12498-
GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr);
12499-
offs += 8;
12500-
continue;
12501-
}
12520+
#ifdef TARGET_64BIT
12521+
if ((offs % 8) == 0 && end - offs >= 8)
12522+
{
12523+
GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr);
12524+
offs += 8;
12525+
continue;
12526+
}
1250212527
#endif
1250312528

12504-
assert((offs % 4) == 0 && end - offs >= 4);
12505-
GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr);
12506-
offs += 4;
12529+
assert((offs % 4) == 0 && end - offs >= 4);
12530+
GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr);
12531+
offs += 4;
12532+
}
1250712533
}
1250812534
}
1250912535
}

src/coreclr/jit/lsrabuild.cpp

+24-12
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,20 @@ bool LinearScan::isContainableMemoryOp(GenTree* node)
712712
//
713713
void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
714714
{
715+
if (refType == RefTypeKill)
716+
{
717+
// The mask identifies a set of registers that will be used during
718+
// codegen. Mark these as modified here, so when we do final frame
719+
// layout, we'll know about all these registers. This is especially
720+
// important if mask contains callee-saved registers, which affect the
721+
// frame size since we need to save/restore them. In the case where we
722+
// have a copyBlk with GC pointers, can need to call the
723+
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and
724+
// RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as
725+
// modified until codegen, which is too late.
726+
compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true));
727+
}
728+
715729
for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
716730
{
717731
if (mask & 1)
@@ -1137,16 +1151,6 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
11371151

11381152
if (killMask != RBM_NONE)
11391153
{
1140-
// The killMask identifies a set of registers that will be used during codegen.
1141-
// Mark these as modified here, so when we do final frame layout, we'll know about
1142-
// all these registers. This is especially important if killMask contains
1143-
// callee-saved registers, which affect the frame size since we need to save/restore them.
1144-
// In the case where we have a copyBlk with GC pointers, can need to call the
1145-
// CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
1146-
// LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
1147-
// which is too late.
1148-
compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true));
1149-
11501154
addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
11511155

11521156
// TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
@@ -2356,7 +2360,15 @@ void LinearScan::buildIntervals()
23562360
// into the scratch register, so it will be killed here.
23572361
if (compiler->compShouldPoisonFrame() && compiler->fgFirstBBisScratch() && block == compiler->fgFirstBB)
23582362
{
2359-
addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true);
2363+
regMaskTP killed;
2364+
#if defined(TARGET_XARCH)
2365+
// Poisoning uses EAX for small vars and rep stosd that kills edi, ecx and eax for large vars.
2366+
killed = RBM_EDI | RBM_ECX | RBM_EAX;
2367+
#else
2368+
// Poisoning uses REG_SCRATCH for small vars and memset helper for big vars.
2369+
killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
2370+
#endif
2371+
addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true);
23602372
currentLoc += 2;
23612373
}
23622374

@@ -3291,7 +3303,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
32913303
defCandidates = allRegs(type);
32923304
}
32933305
#else
3294-
defCandidates = allRegs(type);
3306+
defCandidates = allRegs(type);
32953307
#endif // TARGET_X86
32963308

32973309
RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index);

src/tests/JIT/Directed/debugging/poison.cs

+21
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,22 @@ public static unsafe int Main()
1919
WithoutGCRef poisoned2;
2020
Unsafe.SkipInit(out poisoned2);
2121
result &= VerifyPoison(&poisoned2, sizeof(WithoutGCRef));
22+
23+
Massive poisoned3;
24+
Unsafe.SkipInit(out poisoned3);
25+
result &= VerifyPoison(&poisoned3, sizeof(Massive));
26+
27+
WithoutGCRef poisoned4;
28+
Unsafe.SkipInit(out poisoned4);
29+
result &= VerifyPoison(&poisoned4, sizeof(WithoutGCRef));
30+
31+
Massive poisoned5;
32+
Unsafe.SkipInit(out poisoned5);
33+
result &= VerifyPoison(&poisoned5, sizeof(Massive));
34+
35+
GCRef zeroed2;
36+
Unsafe.SkipInit(out zeroed2);
37+
result &= VerifyZero(Unsafe.AsPointer(ref zeroed2), Unsafe.SizeOf<GCRef>());
2238

2339
return result ? 100 : 101;
2440
}
@@ -53,4 +69,9 @@ private struct WithoutGCRef
5369
public int ANumber;
5470
public float AFloat;
5571
}
72+
73+
private unsafe struct Massive
74+
{
75+
public fixed byte Bytes[0x10008];
76+
}
5677
}

0 commit comments

Comments
 (0)