From 27a02d33bfda0ec8f84c17a502b033a05bf3a0bb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 12 Nov 2021 19:35:49 +0100 Subject: [PATCH 1/2] Fix poisoning for very large structs For very large structs poisoning could end up generating instructions requiring larger local var offsets than we can handle which would hit an IMPL_LIMIT that throws InvalidProgramException. Switch to using rep stosd (x86/x64)/memset helper (other platforms) when a local needs more than a certain number of mov instructions to poison. Also includes a register allocator change to mark killed registers as modified in addRefsForPhysRegMask instead of by the (usually) calling function, since this function is used directly in the change. Fix #60852 --- src/coreclr/jit/codegencommon.cpp | 88 ++++++++++++++-------- src/coreclr/jit/lsrabuild.cpp | 36 ++++++--- src/tests/JIT/Directed/debugging/poison.cs | 21 ++++++ 3 files changed, 102 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8585bab6bc0bd..378bc3d6f683a 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -12446,7 +12446,19 @@ void CodeGenInterface::VariableLiveKeeper::dumpLvaVariableLiveRanges() const void CodeGen::genPoisonFrame(regMaskTP regLiveIn) { assert(compiler->compShouldPoisonFrame()); - assert((regLiveIn & genRegMask(REG_SCRATCH)) == 0); +#if defined(TARGET_XARCH) + regNumber poisonValReg = REG_EAX; + assert((regLiveIn & (RBM_EDI | RBM_ECX | RBM_EAX)) == 0); +#else + regNumber poisonValReg = REG_SCRATCH; + assert((regLiveIn & (genRegMask(REG_SCRATCH) | RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2)) == 0); +#endif + +#ifdef TARGET_64BIT + const ssize_t poisonVal = (ssize_t)0xcdcdcdcdcdcdcdcd; +#else + const ssize_t poisonVal = (ssize_t)0xcdcdcdcd; +#endif // The first time we need to poison something we will initialize a register to the largest immediate cccccccc that // we can fit. @@ -12461,49 +12473,63 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) assert(varDsc->lvOnFrame); - int size = (int)compiler->lvaLclSize(varNum); - + unsigned int size = compiler->lvaLclSize(varNum); if (size / TARGET_POINTER_SIZE > 16) { - // For very large structs the offsets in the movs we emit below can - // grow too large to be handled properly by JIT. Furthermore, while - // this is only debug code, for very large structs this can bloat - // the code too much due to the singular movs used. - continue; - } - - if (!hasPoisonImm) - { -#ifdef TARGET_64BIT - instGen_Set_Reg_To_Imm(EA_8BYTE, REG_SCRATCH, (ssize_t)0xcdcdcdcdcdcdcdcd); + // This will require more than 16 instructions, switch to rep stosd/memset call. + CLANG_FORMAT_COMMENT_ANCHOR; +#if defined(TARGET_XARCH) + GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_EDI, (int)varNum, 0); + assert(size % 4 == 0); + instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ECX, size / 4); + // On xarch we can leave the value in eax and only set eax once + // since rep stosd does not kill eax. + if (!hasPoisonImm) + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_EAX, poisonVal); + hasPoisonImm = true; + } + instGen(INS_r_stosd); #else - instGen_Set_Reg_To_Imm(EA_4BYTE, REG_SCRATCH, (ssize_t)0xcdcdcdcd); + GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0); + instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast(poisonVal)); + instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_2, size); + genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); + // May kill REG_SCRATCH, so we need to reload it. + hasPoisonImm = false; #endif - hasPoisonImm = true; } + else + { + if (!hasPoisonImm) + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, poisonValReg, poisonVal); + hasPoisonImm = true; + } // For 64-bit we check if the local is 8-byte aligned. For 32-bit, we assume everything is always 4-byte aligned. #ifdef TARGET_64BIT - bool fpBased; - int addr = compiler->lvaFrameAddress((int)varNum, &fpBased); + bool fpBased; + int addr = compiler->lvaFrameAddress((int)varNum, &fpBased); #else - int addr = 0; + int addr = 0; #endif - int end = addr + size; - for (int offs = addr; offs < end;) - { -#ifdef TARGET_64BIT - if ((offs % 8) == 0 && end - offs >= 8) + int end = addr + (int)size; + for (int offs = addr; offs < end;) { - GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr); - offs += 8; - continue; - } +#ifdef TARGET_64BIT + if ((offs % 8) == 0 && end - offs >= 8) + { + GetEmitter()->emitIns_S_R(ins_Store(TYP_LONG), EA_8BYTE, REG_SCRATCH, (int)varNum, offs - addr); + offs += 8; + continue; + } #endif - assert((offs % 4) == 0 && end - offs >= 4); - GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr); - offs += 4; + assert((offs % 4) == 0 && end - offs >= 4); + GetEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, REG_SCRATCH, (int)varNum, offs - addr); + offs += 4; + } } } } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 88a3aebdd8e68..c286a71406938 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -712,6 +712,20 @@ bool LinearScan::isContainableMemoryOp(GenTree* node) // void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse) { + if (refType == RefTypeKill) + { + // The mask identifies a set of registers that will be used during + // codegen. Mark these as modified here, so when we do final frame + // layout, we'll know about all these registers. This is especially + // important if mask contains callee-saved registers, which affect the + // frame size since we need to save/restore them. In the case where we + // have a copyBlk with GC pointers, can need to call the + // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and + // RDI, if LSRA doesn't assign RSI/RDI, they wouldn't get marked as + // modified until codegen, which is too late. + compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); + } + for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1) { if (mask & 1) @@ -1137,16 +1151,6 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo if (killMask != RBM_NONE) { - // The killMask identifies a set of registers that will be used during codegen. - // Mark these as modified here, so when we do final frame layout, we'll know about - // all these registers. This is especially important if killMask contains - // callee-saved registers, which affect the frame size since we need to save/restore them. - // In the case where we have a copyBlk with GC pointers, can need to call the - // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if - // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen, - // which is too late. - compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(true)); - addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true); // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee @@ -2356,7 +2360,15 @@ void LinearScan::buildIntervals() // into the scratch register, so it will be killed here. if (compiler->compShouldPoisonFrame() && compiler->fgFirstBBisScratch() && block == compiler->fgFirstBB) { - addRefsForPhysRegMask(genRegMask(REG_SCRATCH), currentLoc + 1, RefTypeKill, true); + regMaskTP killed; +#if defined(TARGET_XARCH) + // Poisoning uses EAX for small vars and rep stosd that kills edi, ecx and eax for large vars. + killed = RBM_EDI | RBM_ECX | RBM_EAX; +#else + // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars. + killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); +#endif + addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true); currentLoc += 2; } @@ -3291,7 +3303,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, defCandidates = allRegs(type); } #else - defCandidates = allRegs(type); + defCandidates = allRegs(type); #endif // TARGET_X86 RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index); diff --git a/src/tests/JIT/Directed/debugging/poison.cs b/src/tests/JIT/Directed/debugging/poison.cs index 463894a4e6a35..9cb366119ccbd 100644 --- a/src/tests/JIT/Directed/debugging/poison.cs +++ b/src/tests/JIT/Directed/debugging/poison.cs @@ -19,6 +19,22 @@ public static unsafe int Main() WithoutGCRef poisoned2; Unsafe.SkipInit(out poisoned2); result &= VerifyPoison(&poisoned2, sizeof(WithoutGCRef)); + + Massive poisoned3; + Unsafe.SkipInit(out poisoned3); + result &= VerifyPoison(&poisoned3, sizeof(Massive)); + + WithoutGCRef poisoned4; + Unsafe.SkipInit(out poisoned4); + result &= VerifyPoison(&poisoned4, sizeof(WithoutGCRef)); + + Massive poisoned5; + Unsafe.SkipInit(out poisoned5); + result &= VerifyPoison(&poisoned5, sizeof(Massive)); + + GCRef zeroed2; + Unsafe.SkipInit(out zeroed2); + result &= VerifyZero(Unsafe.AsPointer(ref zeroed2), Unsafe.SizeOf()); return result ? 100 : 101; } @@ -53,4 +69,9 @@ private struct WithoutGCRef public int ANumber; public float AFloat; } + + private unsafe struct Massive + { + public fixed byte Bytes[0x10008]; + } } From 3878fe796416140a84bc8fb9d2498fcd56b82cc3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 16 Nov 2021 10:18:18 +0100 Subject: [PATCH 2/2] Update src/coreclr/jit/codegencommon.cpp --- src/coreclr/jit/codegencommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 378bc3d6f683a..ba734ed7302fe 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -12474,7 +12474,7 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) assert(varDsc->lvOnFrame); unsigned int size = compiler->lvaLclSize(varNum); - if (size / TARGET_POINTER_SIZE > 16) + if ((size / TARGET_POINTER_SIZE) > 16) { // This will require more than 16 instructions, switch to rep stosd/memset call. CLANG_FORMAT_COMMENT_ANCHOR;