From 3eaea5d1ead8886a5fc1b070c4eac335feda8daf Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 10 Oct 2022 18:47:31 -0700 Subject: [PATCH 01/14] Added INS_sub_hide --- src/coreclr/jit/codegenxarch.cpp | 7 +------ src/coreclr/jit/instrsxarch.h | 3 +++ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 6d632942647d6..35826ac559e7f 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2266,12 +2266,7 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm { // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment // to ESP. So do the work in the count register. - // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require - // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't - // track". - inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false); - inst_RV_IV(INS_sub, regTmp, (target_ssize_t)-spDelta, EA_PTRSIZE); - inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false); + inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); } else #endif // TARGET_X86 diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index e766df67304d8..a46a863038af7 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -73,6 +73,9 @@ INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | Reads_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) +// Does not affect the stack tracking in the emitter +INST4(sub_hide, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) + INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, Writes_OF | Writes_SF | Writes_ZF | Writes_AF | Writes_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_FLAGS_Has_Wbit ) INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, Resets_OF | Writes_SF | Writes_ZF | Undefined_AF | Writes_PF | Resets_CF | INS_FLAGS_Has_Wbit ) From 63f940e1fa81606a5075220ad7b23558648037eb Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 10 Oct 2022 18:53:28 -0700 Subject: [PATCH 02/14] Update comment --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 35826ac559e7f..0828a15a48210 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2265,7 +2265,7 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm if (regTmp != REG_NA) { // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment - // to ESP. So do the work in the count register. + // to ESP. inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); } else From 9dae74a8f0c7d04e925a909a7f940188b5b7b8b7 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 10 Oct 2022 19:12:14 -0700 Subject: [PATCH 03/14] Update x86 emitter to handle INS_sub_hide --- src/coreclr/jit/emitxarch.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1ef3510a75260..dfa5ccc07b550 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -11248,7 +11248,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) case IF_RRW_ARD: // Mark the destination register as holding a GCT_BYREF - assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); + assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)); emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); break; @@ -11265,7 +11265,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) case IF_ARW_RRD: case IF_ARW_CNS: - assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); + assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)); break; default: @@ -11695,7 +11695,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // reg could have been a GCREF as GCREF + int=BYREF // or BYREF+/-int=BYREF - assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub)); + assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide)); emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); break; @@ -12155,7 +12155,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) case IF_RRW_MRD: assert(id->idGCref() == GCT_BYREF); - assert(ins == INS_add || ins == INS_sub); + assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide); // Mark it as holding a GCT_BYREF emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); @@ -12720,7 +12720,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) break; case INS_add: - case INS_sub: + case INS_sub + case INS_sub_hide: assert(id->idGCref() == GCT_BYREF); #if 0 @@ -12743,7 +12744,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // r1/r2 could have been a GCREF as GCREF + int=BYREF // or BYREF+/-int=BYREF assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) || - ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub))); + ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub || ins == INS_sub_hide))); #endif // DEBUG #endif // 0 @@ -13221,7 +13222,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) } if (emitThisByrefRegs & regMask) { - assert(ins == INS_add || ins == INS_sub); + assert(ins == INS_add || ins == INS_sub || ins == INS_sub_hide); } #endif // Mark it as holding a GCT_BYREF @@ -14844,8 +14845,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case INS_sub: + case INS_sub_hide: // Check for "sub ESP, icon" - if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) + if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) { assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / TARGET_POINTER_SIZE)); @@ -14854,7 +14856,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case INS_add: // Check for "add ESP, icon" - if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) + if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) { assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL); emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0, @@ -15334,6 +15336,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_add: case INS_sub: + case INS_sub_hide: case INS_and: case INS_or: case INS_xor: From eeb674b9b19d0c65c290529ac9f501ba26ade756 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Tue, 11 Oct 2022 13:11:01 -0700 Subject: [PATCH 04/14] Update emitxarch.cpp --- src/coreclr/jit/emitxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index dfa5ccc07b550..84facea7786db 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -12720,7 +12720,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) break; case INS_add: - case INS_sub + case INS_sub: case INS_sub_hide: assert(id->idGCref() == GCT_BYREF); From 82adddb9ebf3f97526122c4bd66e22696491a42c Mon Sep 17 00:00:00 2001 From: Will Smith Date: Fri, 14 Oct 2022 10:02:54 -0700 Subject: [PATCH 05/14] Update codegenxarch.cpp --- src/coreclr/jit/codegenxarch.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 0828a15a48210..0cf7755bcc30b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2264,8 +2264,7 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm #ifdef TARGET_X86 if (regTmp != REG_NA) { - // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment - // to ESP. + // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment to ESP. inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); } else From 07a34f8b6fa41fe67bff5b777c49ca30fc005fbc Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 24 Oct 2022 12:03:21 -0700 Subject: [PATCH 06/14] Fixed tracking --- src/coreclr/jit/emitxarch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 0ae0b3b740964..bf3ffe7aa36aa 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -14833,7 +14833,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case INS_sub: - case INS_sub_hide: // Check for "sub ESP, icon" if (id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP) { From 7d67b0fabfc633b6bdee2cddc22018f4e342ef07 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 24 Oct 2022 12:34:29 -0700 Subject: [PATCH 07/14] Removed 'regTmp' as a parameter --- src/coreclr/jit/codegen.h | 11 ++++-- src/coreclr/jit/codegenxarch.cpp | 65 ++++++++++++++------------------ 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 641aeaaef55f1..bc49a44e7bd27 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1430,13 +1430,16 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genReturn(GenTree* treeNode); +#ifdef TARGET_XARCH + void genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack); + void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack); + target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack); + void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta); +#else // !TARGET_XARCH void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp); void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp); target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp); - -#if defined(TARGET_XARCH) - void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp); -#endif // defined(TARGET_XARCH) +#endif // !TARGET_XARCH void genLclHeap(GenTree* tree); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index ade25562b79aa..1979f6f16b51c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2216,13 +2216,12 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni // // Arguments: // spDelta - the value to add to SP. Must be negative or zero. -// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP -// adjustment from the emitter, using this register. +// canTrack - x86 only: whether or not to track the SP adjustment // // Return Value: // None. // -void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp) +void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack) { assert(spDelta < 0); @@ -2230,16 +2229,19 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm // function that does a probe, which will in turn call this function. assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); -#ifdef TARGET_X86 - if (regTmp != REG_NA) +#ifdef TARGET_AMD64 + // We always track the SP adjustment on X64. + canTrack = true; +#endif // TARGET_AMD64 + + if (canTrack) { - // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment to ESP. - inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); + inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); } else -#endif // TARGET_X86 { - inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); + // For x86, some cases don't want to track the adjustment to SP. + inst_RV_IV(INS_sub_hide, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); } } @@ -2251,16 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTm // Arguments: // spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, // but the stack pointer doesn't move. -// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP -// adjustment from the emitter, using this register. +// canTrack - x86 only: whether or not to track the SP adjustment // // Return Value: // None. // -void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp) +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack) { GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - genStackPointerConstantAdjustment(spDelta, regTmp); + genStackPointerConstantAdjustment(spDelta, canTrack); } //------------------------------------------------------------------------ @@ -2274,13 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNum // // Arguments: // spDelta - the value to add to SP. Must be negative. -// regTmp - x86 only: an available temporary register. If not REG_NA, hide the SP -// adjustment from the emitter, using this register. +// canTrack - x86 only: whether or not to track the SP adjustment // // Return Value: // Offset in bytes from SP to last probed address. // -target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp) +target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack) { assert(spDelta < 0); @@ -2290,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s do { ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); - genStackPointerConstantAdjustmentWithProbe(spOneDelta, regTmp); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, canTrack); spRemainingDelta -= spOneDelta; } while (spRemainingDelta < 0); @@ -2317,21 +2317,18 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s // genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer, // and probe the stack as appropriate. // -// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently, -// requires a temporary register and extra code. +// We hide the ESP adjustment from the emitter. // // Arguments: // regSpDelta - the register value to add to SP. The value in this register must be negative. // This register might be trashed. -// regTmp - an available temporary register. Will be trashed. // // Return Value: // None. // -void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp) +void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta) { assert(regSpDelta != REG_NA); - assert(regTmp != REG_NA); // Tickle the pages to ensure that ESP is always valid and is // in sync with the "stack guard page". Note that in the worst @@ -2350,9 +2347,7 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re // xor regSpDelta, regSpDelta // Overflow, pick lowest possible number // loop: // test ESP, [ESP+0] // tickle the page - // mov regTmp, ESP - // sub regTmp, eeGetPageSize() - // mov ESP, regTmp + // sub ESP, eeGetPageSize() // cmp ESP, regSpDelta // jae loop // mov ESP, regSpDelta @@ -2370,11 +2365,8 @@ void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, re // be on the guard page. It is OK to leave the final value of ESP on the guard page. GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - // Subtract a page from ESP. This is a trick to avoid the emitter trying to track the - // decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly. - inst_Mov(TYP_I_IMPL, regTmp, REG_SPBASE, /* canSkip */ false); - inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); - inst_Mov(TYP_I_IMPL, REG_SPBASE, regTmp, /* canSkip */ false); + // Subtract a page from ESP and hide the adjustment. + inst_RV_IV(INS_sub_hide, REG_SPBASE, compiler->eeGetPageSize(), EA_PTRSIZE); inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL); inst_JMP(EJ_jae, loop); @@ -2527,7 +2519,7 @@ void CodeGen::genLclHeap(GenTree* tree) if ((amount > 0) && !initMemOrLargeAlloc) { - lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, REG_NA); + lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ true); stackAdjustment = 0; locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize; goto ALLOC_DONE; @@ -2589,7 +2581,7 @@ void CodeGen::genLclHeap(GenTree* tree) // the alloc, not after. assert(amount < compiler->eeGetPageSize()); // must be < not <= - lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, regCnt); + lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ regCnt == REG_NA); goto ALLOC_DONE; } @@ -2640,8 +2632,7 @@ void CodeGen::genLclHeap(GenTree* tree) // adds to ESP). inst_RV(INS_NEG, regCnt, TYP_I_IMPL); - regNumber regTmp = tree->GetSingleTempReg(); - genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp); + genStackPointerDynamicAdjustmentWithProbe(regCnt); // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // we're going to assume the worst and probe. @@ -2661,11 +2652,11 @@ void CodeGen::genLclHeap(GenTree* tree) (stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > compiler->eeGetPageSize())) { - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, REG_NA); + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* canTrack */ true); } else { - genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, REG_NA); + genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* canTrack */ true); } } @@ -7856,7 +7847,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) { - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, REG_NA); + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* canTrack */ true); } else { From 17e797add65c0388eec3a23fb3c0f53f737abd3f Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 24 Oct 2022 16:29:50 -0700 Subject: [PATCH 08/14] Formatting --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegenxarch.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index bc49a44e7bd27..6b489a37490f2 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1435,7 +1435,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack); target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack); void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta); -#else // !TARGET_XARCH +#else // !TARGET_XARCH void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp); void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, regNumber regTmp); target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, regNumber regTmp); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 1979f6f16b51c..013a2374c5886 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2581,7 +2581,8 @@ void CodeGen::genLclHeap(GenTree* tree) // the alloc, not after. assert(amount < compiler->eeGetPageSize()); // must be < not <= - lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ regCnt == REG_NA); + lastTouchDelta = + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ regCnt == REG_NA); goto ALLOC_DONE; } From 1a266acc1158dd90c889b4bf0222925848c0b506 Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 25 Oct 2022 11:28:31 -0700 Subject: [PATCH 09/14] Feedback - rename 'canTrack' to 'trackSpAdjustments' --- src/coreclr/jit/codegen.h | 6 +++--- src/coreclr/jit/codegenxarch.cpp | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 6b489a37490f2..0df8859f4ad4e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1431,9 +1431,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genReturn(GenTree* treeNode); #ifdef TARGET_XARCH - void genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack); - void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack); - target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack); + void genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments); + void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments); + target_ssize_t genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments); void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta); #else // !TARGET_XARCH void genStackPointerConstantAdjustment(ssize_t spDelta, regNumber regTmp); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 013a2374c5886..f2ed76a18f571 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2216,12 +2216,12 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni // // Arguments: // spDelta - the value to add to SP. Must be negative or zero. -// canTrack - x86 only: whether or not to track the SP adjustment +// trackSpAdjustments - x86 only: whether or not to track the SP adjustment // // Return Value: // None. // -void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack) +void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool trackSpAdjustments) { assert(spDelta < 0); @@ -2231,10 +2231,10 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack) #ifdef TARGET_AMD64 // We always track the SP adjustment on X64. - canTrack = true; + trackSpAdjustments = true; #endif // TARGET_AMD64 - if (canTrack) + if (trackSpAdjustments) { inst_RV_IV(INS_sub, REG_SPBASE, (target_ssize_t)-spDelta, EA_PTRSIZE); } @@ -2253,15 +2253,15 @@ void CodeGen::genStackPointerConstantAdjustment(ssize_t spDelta, bool canTrack) // Arguments: // spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, // but the stack pointer doesn't move. -// canTrack - x86 only: whether or not to track the SP adjustment +// trackSpAdjustments - x86 only: whether or not to track the SP adjustment // // Return Value: // None. // -void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool canTrack) +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool trackSpAdjustments) { GetEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - genStackPointerConstantAdjustment(spDelta, canTrack); + genStackPointerConstantAdjustment(spDelta, trackSpAdjustments); } //------------------------------------------------------------------------ @@ -2275,12 +2275,12 @@ void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool c // // Arguments: // spDelta - the value to add to SP. Must be negative. -// canTrack - x86 only: whether or not to track the SP adjustment +// trackSpAdjustments - x86 only: whether or not to track the SP adjustment // // Return Value: // Offset in bytes from SP to last probed address. // -target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool canTrack) +target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, bool trackSpAdjustments) { assert(spDelta < 0); @@ -2290,7 +2290,7 @@ target_ssize_t CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t s do { ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); - genStackPointerConstantAdjustmentWithProbe(spOneDelta, canTrack); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, trackSpAdjustments); spRemainingDelta -= spOneDelta; } while (spRemainingDelta < 0); @@ -2519,7 +2519,7 @@ void CodeGen::genLclHeap(GenTree* tree) if ((amount > 0) && !initMemOrLargeAlloc) { - lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ true); + lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ true); stackAdjustment = 0; locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize; goto ALLOC_DONE; @@ -2582,7 +2582,7 @@ void CodeGen::genLclHeap(GenTree* tree) assert(amount < compiler->eeGetPageSize()); // must be < not <= lastTouchDelta = - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* canTrack */ regCnt == REG_NA); + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ regCnt == REG_NA); goto ALLOC_DONE; } @@ -2653,11 +2653,11 @@ void CodeGen::genLclHeap(GenTree* tree) (stackAdjustment + (target_size_t)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > compiler->eeGetPageSize())) { - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* canTrack */ true); + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true); } else { - genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* canTrack */ true); + genStackPointerConstantAdjustment(-(ssize_t)stackAdjustment, /* trackSpAdjustments */ true); } } @@ -7848,7 +7848,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) { - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* canTrack */ true); + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* trackSpAdjustments */ true); } else { From 1801934c6f969800a7cd47a09a57067c1804322e Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 25 Oct 2022 12:29:36 -0700 Subject: [PATCH 10/14] Remove possible temp register from being assigned in LclHeap --- src/coreclr/jit/codegenxarch.cpp | 5 +++++ src/coreclr/jit/lsraxarch.cpp | 27 +++++++++++++-------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index f2ed76a18f571..d88209c514c0c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2456,6 +2456,7 @@ void CodeGen::genLclHeap(GenTree* tree) } else { + assert(tree->AvailableTempRegCount() == 1); regCnt = tree->ExtractTempReg(); // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. @@ -2570,6 +2571,7 @@ void CodeGen::genLclHeap(GenTree* tree) } else { + assert(tree->AvailableTempRegCount() == 1); regCnt = tree->ExtractTempReg(); } } @@ -2598,6 +2600,9 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } + // We should not have a temp register available at this point. + assert(tree->AvailableTempRegCount() == 0); + if (compiler->info.compInitMem) { // At this point 'regCnt' is set to the number of loop iterations for this loop, if each diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index e248620825a9c..940a6d68858c5 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1650,10 +1650,10 @@ int LinearScan::BuildLclHeap(GenTree* tree) // const and <=6 reg words - 0 (pushes '0') // const and >6 reg words Yes 0 (pushes '0') // const and =PageSize No 2 (regCnt and tmpReg for subtracing from sp) + // + // const and >=PageSize No 1 (regCnt) // Non-const Yes 0 (regCnt=targetReg and pushes '0') - // Non-const No 2 (regCnt and tmpReg for subtracting from sp) + // Non-const No 1 (regCnt) // // Note: Here we don't need internal register to be different from targetReg. // Rather, require it to be different from operand's reg. @@ -1667,6 +1667,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) if (sizeVal == 0) { + // For regCnt buildInternalIntRegisterDefForNode(tree); } else @@ -1679,22 +1680,20 @@ int LinearScan::BuildLclHeap(GenTree* tree) // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc) // we will generate 'push 0'. assert((sizeVal % REGSIZE_BYTES) == 0); + if (!compiler->info.compInitMem) { - // No need to initialize allocated stack space. - if (sizeVal < compiler->eeGetPageSize()) - { #ifdef TARGET_X86 - // x86 needs a register here to avoid generating "sub" on ESP. - buildInternalIntRegisterDefForNode(tree); -#endif - } - else + // x86 always needs regCnt. + // For regCnt + buildInternalIntRegisterDefForNode(tree); +#else // !TARGET_X86 + if (sizeVal >= compiler->eeGetPageSize()) { - // We need two registers: regCnt and RegTmp - buildInternalIntRegisterDefForNode(tree); + // For regCnt buildInternalIntRegisterDefForNode(tree); } +#endif // !TARGET_X86 } } } @@ -1702,7 +1701,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) { if (!compiler->info.compInitMem) { - buildInternalIntRegisterDefForNode(tree); + // For regCnt buildInternalIntRegisterDefForNode(tree); } BuildUse(size); From 5198752517cd25e87ce323e4809ee3fff8d3d35b Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 25 Oct 2022 13:19:25 -0700 Subject: [PATCH 11/14] Formatting --- src/coreclr/jit/codegenxarch.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index d88209c514c0c..ff7c986b5b87f 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2520,7 +2520,8 @@ void CodeGen::genLclHeap(GenTree* tree) if ((amount > 0) && !initMemOrLargeAlloc) { - lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ true); + lastTouchDelta = + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ true); stackAdjustment = 0; locAllocStackOffset = (target_size_t)compiler->lvaOutgoingArgSpaceSize; goto ALLOC_DONE; @@ -2583,8 +2584,8 @@ void CodeGen::genLclHeap(GenTree* tree) // the alloc, not after. assert(amount < compiler->eeGetPageSize()); // must be < not <= - lastTouchDelta = - genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, /* trackSpAdjustments */ regCnt == REG_NA); + lastTouchDelta = genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, + /* trackSpAdjustments */ regCnt == REG_NA); goto ALLOC_DONE; } From 8c55d6ed46f75020101b87b32e62505d771129e9 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Wed, 26 Oct 2022 09:51:38 -0700 Subject: [PATCH 12/14] Update src/coreclr/jit/codegenxarch.cpp Co-authored-by: Bruce Forstall --- src/coreclr/jit/codegenxarch.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index ff7c986b5b87f..bc8d885861464 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2572,8 +2572,7 @@ void CodeGen::genLclHeap(GenTree* tree) } else { - assert(tree->AvailableTempRegCount() == 1); - regCnt = tree->ExtractTempReg(); + regCnt = tree->GetSingleTempReg(); } } From 2cf0649de6da5870670f142925def52842481094 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Wed, 26 Oct 2022 09:51:46 -0700 Subject: [PATCH 13/14] Update src/coreclr/jit/codegenxarch.cpp Co-authored-by: Bruce Forstall --- src/coreclr/jit/codegenxarch.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index bc8d885861464..7a63fb34a0011 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2456,8 +2456,7 @@ void CodeGen::genLclHeap(GenTree* tree) } else { - assert(tree->AvailableTempRegCount() == 1); - regCnt = tree->ExtractTempReg(); + regCnt = tree->GetSingleTempReg(); // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. inst_Mov(size->TypeGet(), regCnt, targetReg, /* canSkip */ true); From 023262f0de4a28bbedd6b67cd82b1345ccb78bc3 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Fri, 28 Oct 2022 13:08:36 -0700 Subject: [PATCH 14/14] Update codegenxarch.cpp --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7a63fb34a0011..9cbb8fc3de37e 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2599,7 +2599,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((size_t)(int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - // We should not have a temp register available at this point. + // We should not have any temp registers at this point. assert(tree->AvailableTempRegCount() == 0); if (compiler->info.compInitMem)