From 564c75d3d9d78e5be642620e75084fdbabca979d Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Tue, 17 May 2022 09:45:51 -0700 Subject: [PATCH 1/2] Implemented fake code splitting in JIT for testing without VM --- src/coreclr/jit/compiler.cpp | 9 ++++++++- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/ee_il_dll.cpp | 32 +++++++++++++++++++++++++++++++ src/coreclr/jit/emit.cpp | 5 +++-- src/coreclr/jit/jitconfigvalues.h | 5 +++++ 5 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 9584af664cfe8..5178c23aa9e59 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3193,7 +3193,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #endif #endif // DEBUG - opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT); + opts.compProcedureSplitting = + jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || JitConfig.JitFakeProcedureSplitting(); #ifdef TARGET_ARM64 // TODO-ARM64-NYI: enable hot/cold splitting @@ -3235,6 +3236,12 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #endif } + // JitFakeProcedureSplitting overrides JitNoProcedureSplitting with a fake splitting implementation + if (JitConfig.JitFakeProcedureSplitting()) + { + opts.compProcedureSplitting = true; + } + #ifdef DEBUG // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30)) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 7d79862592ffe..057143851c5f1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7602,6 +7602,8 @@ class Compiler // ICorJitInfo wrappers + void eeAllocMem(AllocMemArgs* args, UNATIVE_OFFSET hotSizeRequest, UNATIVE_OFFSET coldSizeRequest); + void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize); void eeAllocUnwindInfo(BYTE* pHotCode, diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index ba822b63527fa..cc2572e890473 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -1122,8 +1122,34 @@ void Compiler::eeDispLineInfos() * (e.g., host AMD64, target ARM64), then VM will get confused anyway. */ +void Compiler::eeAllocMem(AllocMemArgs* args, UNATIVE_OFFSET hotSizeRequest, UNATIVE_OFFSET coldSizeRequest) +{ + // Fake splitting implementation: hot section = hot code + 4K buffer + cold code + const UNATIVE_OFFSET buffer = 4096; + if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) + { + args->hotCodeSize = hotSizeRequest + buffer + coldSizeRequest; + args->coldCodeSize = 0; + } + + info.compCompHnd->allocMem(args); + + // Fix up hot/cold code pointers + if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) + { + args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + buffer; + args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + buffer; + } +} + void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize) { + // Fake splitting currently does not handle unwind info for cold code + if (isColdCode && JitConfig.JitFakeProcedureSplitting()) + { + return; + } + #ifdef DEBUG if (verbose) { @@ -1146,6 +1172,12 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode, BYTE* pUnwindBlock, CorJitFuncKind funcKind) { + // Fake splitting currently does not handle unwind info for cold code + if (pColdCode && JitConfig.JitFakeProcedureSplitting()) + { + return; + } + #ifdef DEBUG if (verbose) { diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index a337b0a77f4ce..5748832702e19 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6046,7 +6046,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitCmpHandle->allocMem(&args); + emitComp->eeAllocMem(&args, emitTotalHotCodeSize, emitTotalColdCodeSize); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; @@ -6064,7 +6064,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitCmpHandle->allocMem(&args); + emitComp->eeAllocMem(&args, emitTotalHotCodeSize, emitTotalColdCodeSize); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; @@ -6318,6 +6318,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, assert(coldCodeBlock); cp = coldCodeBlock; writeableOffset = coldCodeBlockRW - coldCodeBlock; + emitOffsAdj = 0; #ifdef DEBUG if (emitComp->opts.disAsm || emitComp->verbose) { diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index f1f1be807f4b2..44ecaf94d4208 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -192,6 +192,11 @@ CONFIG_INTEGER(JitDumpAtOSROffset, W("JitDumpAtOSROffset"), -1) // Only dump OSR CONFIG_INTEGER(JitDumpInlinePhases, W("JitDumpInlinePhases"), 1) // Dump inline compiler phases CONFIG_METHODSET(JitEHDump, W("JitEHDump")) // Dump the EH table for the method, as reported to the VM CONFIG_METHODSET(JitExclude, W("JitExclude")) +CONFIG_INTEGER(JitFakeProcedureSplitting, W("JitFakeProcedureSplitting"), 0) // Do code splitting independent of VM. + // For now, this disables unwind info for + // cold sections, breaking stack walks. + // Set COMPlus_GCgen0size=1000000 to avoid + // running the GC and breaking things. CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting")) CONFIG_METHODSET(JitGCDump, W("JitGCDump")) CONFIG_METHODSET(JitDebugDump, W("JitDebugDump")) From 5965366c7e88f7a1eff4c16d771b56700b118acf Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Tue, 24 May 2022 11:46:17 -0700 Subject: [PATCH 2/2] Implement stress mode for hot/cold splitting Implementation splits after first basic block in method, assuming there is more than one block. Accompanying this implementation are the following fixes: - Loop alignment is disabled for cold blocks, as moving blocks into the cold section may invalidate the initial decision to align. - Long jumps are no longer reduced to short jumps if crossing hot/cold sections. --- src/coreclr/jit/compiler.cpp | 19 +++---- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/ee_il_dll.cpp | 49 ++++++++++------- src/coreclr/jit/emit.cpp | 4 +- src/coreclr/jit/fgopt.cpp | 5 +- src/coreclr/jit/flowgraph.cpp | 87 ++++++++++++++++++------------- src/coreclr/jit/jitconfigvalues.h | 6 ++- 7 files changed, 104 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 0a7e0d930c3a6..00dfb10b8072b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3186,15 +3186,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC); + bool enableFakeSplitting = false; + #ifdef DEBUG + enableFakeSplitting = JitConfig.JitFakeProcedureSplitting(); + #if defined(TARGET_XARCH) // Whether encoding of absolute addr as PC-rel offset is enabled opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0); #endif #endif // DEBUG - opts.compProcedureSplitting = - jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || JitConfig.JitFakeProcedureSplitting(); + opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting; #ifdef TARGET_ARM64 // TODO-ARM64-NYI: enable hot/cold splitting @@ -3208,7 +3211,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) if (opts.compProcedureSplitting) { // Note that opts.compdbgCode is true under ngen for checked assemblies! - opts.compProcedureSplitting = !opts.compDbgCode; + opts.compProcedureSplitting = !opts.compDbgCode || enableFakeSplitting; #ifdef DEBUG // JitForceProcedureSplitting is used to force procedure splitting on checked assemblies. @@ -3236,13 +3239,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #endif } - // JitFakeProcedureSplitting overrides JitNoProcedureSplitting with a fake splitting implementation - if (JitConfig.JitFakeProcedureSplitting()) - { - opts.compProcedureSplitting = true; - } - #ifdef DEBUG + // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30)) { @@ -5192,6 +5190,9 @@ void Compiler::placeLoopAlignInstructions() if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign())) { + // Loop alignment is disabled for cold blocks + assert((block->bbFlags & BBF_COLD) == 0); + // If jmp was not found, then block before the loop start is where align instruction will be added. if (bbHavingAlign == nullptr) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 74b059b1c6506..599583afdeb53 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7613,7 +7613,7 @@ class Compiler // ICorJitInfo wrappers - void eeAllocMem(AllocMemArgs* args, UNATIVE_OFFSET hotSizeRequest, UNATIVE_OFFSET coldSizeRequest); + void eeAllocMem(AllocMemArgs* args); void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize); diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index cc2572e890473..c7ac7b32e5e99 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -1122,40 +1122,52 @@ void Compiler::eeDispLineInfos() * (e.g., host AMD64, target ARM64), then VM will get confused anyway. */ -void Compiler::eeAllocMem(AllocMemArgs* args, UNATIVE_OFFSET hotSizeRequest, UNATIVE_OFFSET coldSizeRequest) +void Compiler::eeAllocMem(AllocMemArgs* args) { +#ifdef DEBUG // Fake splitting implementation: hot section = hot code + 4K buffer + cold code - const UNATIVE_OFFSET buffer = 4096; + const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize; + const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize; + const UNATIVE_OFFSET fakeSplittingBuffer = 4096; + if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) { - args->hotCodeSize = hotSizeRequest + buffer + coldSizeRequest; + args->hotCodeSize = hotSizeRequest + fakeSplittingBuffer + coldSizeRequest; args->coldCodeSize = 0; } +#endif info.compCompHnd->allocMem(args); - // Fix up hot/cold code pointers +#ifdef DEBUG if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) { - args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + buffer; - args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + buffer; + // Fix up hot/cold code pointers + args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + fakeSplittingBuffer; + args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + fakeSplittingBuffer; + + // Reset args' hot/cold code sizes in case caller reads them later + args->hotCodeSize = hotSizeRequest; + args->coldCodeSize = coldSizeRequest; } +#endif } void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize) { - // Fake splitting currently does not handle unwind info for cold code - if (isColdCode && JitConfig.JitFakeProcedureSplitting()) - { - return; - } - #ifdef DEBUG if (verbose) { printf("reserveUnwindInfo(isFunclet=%s, isColdCode=%s, unwindSize=0x%x)\n", isFunclet ? "true" : "false", isColdCode ? "true" : "false", unwindSize); } + + // Fake splitting currently does not handle unwind info for cold code + if (isColdCode && JitConfig.JitFakeProcedureSplitting()) + { + JITDUMP("reserveUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n"); + return; + } #endif // DEBUG if (info.compMatchedVM) @@ -1172,12 +1184,6 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode, BYTE* pUnwindBlock, CorJitFuncKind funcKind) { - // Fake splitting currently does not handle unwind info for cold code - if (pColdCode && JitConfig.JitFakeProcedureSplitting()) - { - return; - } - #ifdef DEBUG if (verbose) { @@ -1201,6 +1207,13 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode, } printf(")\n"); } + + // Fake splitting currently does not handle unwind info for cold code + if (pColdCode && JitConfig.JitFakeProcedureSplitting()) + { + JITDUMP("allocUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n"); + return; + } #endif // DEBUG if (info.compMatchedVM) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 5f040b4c21b32..10c5c096f8934 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6065,7 +6065,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitComp->eeAllocMem(&args, emitTotalHotCodeSize, emitTotalColdCodeSize); + emitComp->eeAllocMem(&args); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; @@ -6083,7 +6083,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitComp->eeAllocMem(&args, emitTotalHotCodeSize, emitTotalColdCodeSize); + emitComp->eeAllocMem(&args); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index a243711cb1c3d..6caaf338ffea9 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5963,9 +5963,10 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication) (bNext != nullptr) && // block is not the last block (bNext->bbRefs == 1) && // No other block jumps to bNext (bNext->bbJumpKind == BBJ_ALWAYS) && // The next block is a BBJ_ALWAYS block - bNext->isEmpty() && // and it is an an empty block + bNext->isEmpty() && // and it is an empty block (bNext != bNext->bbJumpDest) && // special case for self jumps - (bDest != fgFirstColdBlock)) + (bDest != fgFirstColdBlock) && + (!fgInDifferentRegions(block, bDest))) // do not cross hot/cold sections { // case (a) // diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 7782c35597ed7..3414bdd0eaea3 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3414,49 +3414,64 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() BasicBlock* block; BasicBlock* lblk; - for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) - { - bool blockMustBeInHotSection = false; + bool forceSplit = false; -#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION - if (bbIsHandlerBeg(block)) - { - blockMustBeInHotSection = true; - } -#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION +#ifdef DEBUG + // If stress-splitting, split right after the first block; don't handle functions with EH + forceSplit = JitConfig.JitStressProcedureSplitting() && (compHndBBtabCount == 0); +#endif - // Do we have a candidate for the first cold block? - if (firstColdBlock != nullptr) + if (forceSplit) + { + firstColdBlock = fgFirstBB->bbNext; + prevToFirstColdBlock = fgFirstBB; + } + else + { + for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) { - // We have a candidate for first cold block + bool blockMustBeInHotSection = false; - // Is this a hot block? - if (blockMustBeInHotSection || (block->isRunRarely() == false)) +#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION + if (bbIsHandlerBeg(block)) { - // We have to restart the search for the first cold block - firstColdBlock = nullptr; - prevToFirstColdBlock = nullptr; + blockMustBeInHotSection = true; } - } - else // (firstColdBlock == NULL) - { - // We don't have a candidate for first cold block +#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION - // Is this a cold block? - if (!blockMustBeInHotSection && (block->isRunRarely() == true)) + // Do we have a candidate for the first cold block? + if (firstColdBlock != nullptr) { - // - // If the last block that was hot was a BBJ_COND - // then we will have to add an unconditional jump - // so the code size for block needs be large - // enough to make it worth our while - // - if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8)) + // We have a candidate for first cold block + + // Is this a hot block? + if (blockMustBeInHotSection || (block->isRunRarely() == false)) { - // This block is now a candidate for first cold block - // Also remember the predecessor to this block - firstColdBlock = block; - prevToFirstColdBlock = lblk; + // We have to restart the search for the first cold block + firstColdBlock = nullptr; + prevToFirstColdBlock = nullptr; + } + } + else // (firstColdBlock == NULL) + { + // We don't have a candidate for first cold block + + // Is this a cold block? + if (!blockMustBeInHotSection && (block->isRunRarely() == true)) + { + // + // If the last block that was hot was a BBJ_COND + // then we will have to add an unconditional jump + // so the code size for block needs be large + // enough to make it worth our while + // + if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8)) + { + // This block is now a candidate for first cold block + // Also remember the predecessor to this block + firstColdBlock = block; + prevToFirstColdBlock = lblk; + } } } } @@ -3483,8 +3498,9 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() // then it may not be worth it to move it // into the Cold section as a jump to the // Cold section is 5 bytes in size. + // Ignore if stress-splitting. // - if (firstColdBlock->bbNext == nullptr) + if (!forceSplit && firstColdBlock->bbNext == nullptr) { // If the size of the cold block is 7 or less // then we will keep it in the Hot section. @@ -3557,6 +3573,7 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() for (block = firstColdBlock; block != nullptr; block = block->bbNext) { block->bbFlags |= BBF_COLD; + block->unmarkLoopAlign(this DEBUG_ARG("Loop alignment disabled for cold blocks")); } EXIT:; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 2cc1edee74fe7..29e977cf1d8c9 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -164,6 +164,9 @@ CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101) // Intern // stress. CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable // stress modes listed in JitStressModeNames +CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) // Always split after the first basic + // block. Skips functions with EH + // for simplicity. CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0) CONFIG_STRING(JitStressRegsRange, W("JitStressRegsRange")) // Only apply JitStressRegs to methods in this hash range @@ -196,7 +199,8 @@ CONFIG_INTEGER(JitFakeProcedureSplitting, W("JitFakeProcedureSplitting"), 0) // // For now, this disables unwind info for // cold sections, breaking stack walks. // Set COMPlus_GCgen0size=1000000 to avoid - // running the GC and breaking things. + // running the GC, which requires + // stack-walking. CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting")) CONFIG_METHODSET(JitGCDump, W("JitGCDump")) CONFIG_METHODSET(JitDebugDump, W("JitDebugDump"))