diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8e82707adbbbf..0c812ffd935db 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4697,11 +4697,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return; } - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // At this point in the phase list, all the inlinee phases have // been run, and inlinee compiles have exited, so we should only // get this far if we are jitting the root method. @@ -4718,6 +4713,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Record "start" values for post-inlining cycles and elapsed time. RecordStateAtEndOfInlining(); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // Transform each GT_ALLOCOBJ node into either an allocation helper call or // local variable allocation on the stack. ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 050f2e001ea29..1c6950a55296c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5284,6 +5284,7 @@ class Compiler // The number of separate return points in the method. unsigned fgReturnCount; + unsigned fgThrowCount; PhaseStatus fgAddInternal(); @@ -6228,7 +6229,7 @@ class Compiler void fgLinkBasicBlocks(); - unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget); + void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget); void fgCheckBasicBlockControlFlow(); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index af02d257d5dc2..538c49e530ecb 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -50,6 +50,7 @@ void Compiler::fgInit() fgDomBBcount = 0; fgBBVarSetsInited = false; fgReturnCount = 0; + fgThrowCount = 0; m_dfsTree = nullptr; m_loops = nullptr; @@ -233,8 +234,9 @@ bool Compiler::fgEnsureFirstBBisScratch() { // If the result is clearly nonsensical, just inherit // - JITDUMP("\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsisent.\n", - fgPgoConsistent ? "is now" : "was already"); + JITDUMP( + "\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); if (fgPgoConsistent) { @@ -3099,19 +3101,18 @@ void Compiler::fgLinkBasicBlocks() // codeSize -- length of the IL stream // jumpTarget -- [in] bit vector of jump targets found by fgFindJumpTargets // -// Returns: -// number of return blocks (BBJ_RETURN) in the method (may be zero) -// // Notes: -// Invoked for prejited and jitted methods, and for all inlinees - -unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget) +// Invoked for prejitted and jitted methods, and for all inlinees. +// Sets fgReturnCount and fgThrowCount +// +void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget) { - unsigned retBlocks = 0; - const BYTE* codeBegp = codeAddr; - const BYTE* codeEndp = codeAddr + codeSize; - bool tailCall = false; - unsigned curBBoffs = 0; + unsigned retBlocks = 0; + unsigned throwBlocks = 0; + const BYTE* codeBegp = codeAddr; + const BYTE* codeEndp = codeAddr + codeSize; + bool tailCall = false; + unsigned curBBoffs = 0; BasicBlock* curBBdesc; // Keep track of where we are in the scope lists, as we will also @@ -3312,7 +3313,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F // can be dispatched as tail calls from the caller. compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX); retBlocks++; - return retBlocks; + fgReturnCount = retBlocks; + return; } FALLTHROUGH; @@ -3415,6 +3417,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F case CEE_THROW: case CEE_RETHROW: + throwBlocks++; jmpKind = BBJ_THROW; break; @@ -3597,7 +3600,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F fgLinkBasicBlocks(); - return retBlocks; + fgReturnCount = retBlocks; + fgThrowCount = throwBlocks; } /***************************************************************************** @@ -3709,7 +3713,7 @@ void Compiler::fgFindBasicBlocks() /* Now create the basic blocks */ - fgReturnCount = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget); + fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget); if (compIsForInlining()) { @@ -4269,7 +4273,7 @@ void Compiler::fgFixEntryFlowForOSR() // if ((fgEntryBB->bbPreds != nullptr) && (fgEntryBB != fgOSREntryBB)) { - JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsisent.\n", + JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 8fcea689c6c99..7a6246a988fda 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -624,24 +624,86 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitordspTreeID(tree), block->bbNum); + m_compiler->Metrics.InlinerBranchFold++; // We have a constant operand, and should have the all clear to optimize. // Update side effects on the tree, assert there aren't any, and bash to nop. m_compiler->gtUpdateNodeSideEffects(tree); assert((tree->gtFlags & GTF_SIDE_EFFECT) == 0); tree->gtBashToNOP(); - m_madeChanges = true; + m_madeChanges = true; + FlowEdge* removedEdge = nullptr; if (condTree->IsIntegralConst(0)) { - m_compiler->fgRemoveRefPred(block->GetTrueEdge()); + removedEdge = block->GetTrueEdge(); + m_compiler->fgRemoveRefPred(removedEdge); block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge()); } else { - m_compiler->fgRemoveRefPred(block->GetFalseEdge()); + removedEdge = block->GetFalseEdge(); + m_compiler->fgRemoveRefPred(removedEdge); block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge()); } + + // Update profile; make it consistent if possible + // + if (block->hasProfileWeight()) + { + bool repairWasComplete = true; + weight_t const weight = removedEdge->getLikelyWeight(); + + if (weight > 0) + { + // Target block weight will increase. + // + BasicBlock* const target = block->GetTarget(); + assert(target->hasProfileWeight()); + target->setBBProfileWeight(target->bbWeight + weight); + + // Alternate weight will decrease + // + BasicBlock* const alternate = removedEdge->getDestinationBlock(); + assert(alternate->hasProfileWeight()); + weight_t const alternateNewWeight = alternate->bbWeight - weight; + + // If profile weights are consistent, expect at worst a slight underflow. + // + if (m_compiler->fgPgoConsistent && (alternateNewWeight < 0)) + { + assert(m_compiler->fgProfileWeightsEqual(alternateNewWeight, 0)); + } + alternate->setBBProfileWeight(max(0.0, alternateNewWeight)); + + // This will affect profile transitively, so in general + // the profile will become inconsistent. + // + repairWasComplete = false; + + // But we can check for the special case where the + // block's postdominator is target's target (simple + // if/then/else/join). + // + if (target->KindIs(BBJ_ALWAYS)) + { + repairWasComplete = + alternate->KindIs(BBJ_ALWAYS) && alternate->TargetIs(target->GetTarget()); + } + } + + if (!repairWasComplete) + { + JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", + m_compiler->fgPgoConsistent ? "is now" : "was already"); + + if (m_compiler->fgPgoConsistent) + { + m_compiler->Metrics.ProfileInconsistentInlinerBranchFold++; + m_compiler->fgPgoConsistent = false; + } + } + } } } else @@ -692,6 +754,11 @@ PhaseStatus Compiler::fgInline() JitConfig.JitPrintInlinedMethods().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args); #endif // DEBUG + if (fgPgoConsistent) + { + Metrics.ProfileConsistentBeforeInline++; + } + noway_assert(fgFirstBB != nullptr); BasicBlock* block = fgFirstBB; @@ -822,6 +889,14 @@ PhaseStatus Compiler::fgInline() fgRenumberBlocks(); } + if (fgPgoConsistent) + { + Metrics.ProfileConsistentAfterInline++; + } + + Metrics.InlineCount = m_inlineStrategy->GetInlineCount(); + Metrics.InlineAttempt = m_inlineStrategy->GetImportCount(); + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } @@ -1611,6 +1686,75 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) } #endif + // Update profile consistency + // + // If inlinee is inconsistent, root method will be inconsistent too. + // + if (!InlineeCompiler->fgPgoConsistent) + { + if (fgPgoConsistent) + { + JITDUMP("INLINER: profile data in root now inconsistent -- inlinee had inconsistency\n"); + Metrics.ProfileInconsistentInlinee++; + fgPgoConsistent = false; + } + } + + // If we inline a no-return call at a site with profile weight, + // we will introduce inconsistency. + // + if (InlineeCompiler->fgReturnCount == 0) + { + JITDUMP("INLINER: no-return inlinee\n"); + + if (iciBlock->bbWeight > 0) + { + if (fgPgoConsistent) + { + JITDUMP("INLINER: profile data in root now inconsistent -- no-return inlinee at call site in " FMT_BB + " with weight " FMT_WT "\n", + iciBlock->bbNum, iciBlock->bbWeight); + Metrics.ProfileInconsistentNoReturnInlinee++; + fgPgoConsistent = false; + } + } + else + { + // Inlinee scaling should assure this is so. + // + assert(InlineeCompiler->fgFirstBB->bbWeight == 0); + } + } + + // If the call site is not in a try and the callee has a throw, + // we may introduce inconsistency. + // + // Technically we should check if the callee has a throw not in a try, but since + // we can't inline methods with EH yet we don't see those. + // + if (InlineeCompiler->fgThrowCount > 0) + { + JITDUMP("INLINER: may-throw inlinee\n"); + + if (iciBlock->bbWeight > 0) + { + if (fgPgoConsistent) + { + JITDUMP("INLINER: profile data in root now inconsistent -- may-throw inlinee at call site in " FMT_BB + " with weight " FMT_WT "\n", + iciBlock->bbNum, iciBlock->bbWeight); + Metrics.ProfileInconsistentMayThrowInlinee++; + fgPgoConsistent = false; + } + } + else + { + // Inlinee scaling should assure this is so. + // + assert(InlineeCompiler->fgFirstBB->bbWeight == 0); + } + } + // If an inlinee needs GS cookie we need to make sure that the cookie will not be allocated at zero stack offset. // Note that if the root method needs GS cookie then this has already been taken care of. if (!getNeedsGSSecurityCookie() && InlineeCompiler->getNeedsGSSecurityCookie()) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 6d7ef484fe9a8..1afd6bcda7631 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -141,22 +141,39 @@ void Compiler::fgApplyProfileScale() JITDUMP(" ... no callee profile data, will use non-pgo weight to scale\n"); } - // Ostensibly this should be fgCalledCount for the callee, but that's not available - // as it requires some analysis. + // Determine the weight of the first block preds, if any. + // (only happens if the first block is a loop head). // - // For most callees it will be the same as the entry block count. - // - // Note when/if we early do normalization this may need to change. + weight_t firstBlockPredWeight = 0; + for (FlowEdge* const firstBlockPred : fgFirstBB->PredEdges()) + { + firstBlockPredWeight += firstBlockPred->getLikelyWeight(); + } + + // Determine the "input" weight for the callee // weight_t calleeWeight = fgFirstBB->bbWeight; - // Callee entry weight is nonzero? + // Callee entry weight is zero or negative (taking backedges into account)? // If so, just choose the smallest plausible weight. // - if (calleeWeight == BB_ZERO_WEIGHT) + if (calleeWeight <= firstBlockPredWeight) { calleeWeight = fgHaveProfileWeights() ? 1.0 : BB_UNITY_WEIGHT; - JITDUMP(" ... callee entry has weight zero, will use weight of " FMT_WT " to scale\n", calleeWeight); + JITDUMP(" ... callee entry has zero or negative weight, will use weight of " FMT_WT " to scale\n", + calleeWeight); + JITDUMP("Profile data could not be scaled consistently. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentInlineeScale++; + fgPgoConsistent = false; + } + } + else + { + calleeWeight -= firstBlockPredWeight; } // Call site has profile weight? @@ -2829,6 +2846,14 @@ PhaseStatus Compiler::fgInstrumentMethod() // PhaseStatus Compiler::fgIncorporateProfileData() { + // For now we only rely on profile data when optimizing. + // + if (!opts.OptimizationEnabled()) + { + JITDUMP("not optimizing, so not incorporating any profile data\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + // Are we doing profile stress? // if (fgStressBBProf() > 0) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4998dbd6293fa..97aa8af7f2d5f 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5136,10 +5136,14 @@ void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr) // We are unlikely to be able to repair the profile. // For now we don't even try. // - JITDUMP("\nimpResetLeaveBlock: Profile data could not be locally repaired. Data %s inconsisent.\n", + JITDUMP("\nimpResetLeaveBlock: Profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - Metrics.ProfileInconsistentResetLeave++; + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentResetLeave++; + fgPgoConsistent = false; + } } } @@ -7371,8 +7375,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) { JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - Metrics.ProfileInconsistentImporterBranchFold++; + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentImporterBranchFold++; + fgPgoConsistent = false; + } } } } @@ -7657,10 +7665,14 @@ void Compiler::impImportBlockCode(BasicBlock* block) // We are unlikely to be able to repair the profile. // For now we don't even try. // - JITDUMP("Profile data could not be locally repaired. Data %s inconsisent.\n", + JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - Metrics.ProfileInconsistentImporterSwitchFold++; + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentImporterSwitchFold++; + fgPgoConsistent = false; + } } // Create a NOP node diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 3f0c311e84a61..92cac9245fc63 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -1248,8 +1248,14 @@ class IndirectCallTransformer if (!isReasonableUnderflow) { - compiler->fgPgoConsistent = false; - compiler->Metrics.ProfileInconsistentChainedGDV++; + JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", + compiler->fgPgoConsistent ? "is now" : "was already"); + + if (compiler->fgPgoConsistent) + { + compiler->Metrics.ProfileInconsistentChainedGDV++; + compiler->fgPgoConsistent = false; + } } } diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 8c1cb56124ad2..a2aeba6fed725 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -993,6 +993,12 @@ class InlineStrategy m_ImportCount++; } + // Return number of import attempts + unsigned GetImportCount() const + { + return m_ImportCount; + } + // Inform strategy about the inline decision for a prejit root void NotePrejitDecision(const InlineResult& r) { diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 43afa6384d4c6..e077f9f57b255 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -53,6 +53,11 @@ JITMETADATAMETRIC(ClassGDV, int, 0) JITMETADATAMETRIC(MethodGDV, int, 0) JITMETADATAMETRIC(MultiGuessGDV, int, 0) JITMETADATAMETRIC(ChainedGDV, int, 0) +JITMETADATAMETRIC(InlinerBranchFold, int, 0) +JITMETADATAMETRIC(InlineAttempt, int, 0) +JITMETADATAMETRIC(InlineCount, int, 0) +JITMETADATAMETRIC(ProfileConsistentBeforeInline, int, 0) +JITMETADATAMETRIC(ProfileConsistentAfterInline, int, 0) JITMETADATAMETRIC(ProfileSynthesizedBlendedOrRepaired, int, 0) JITMETADATAMETRIC(ProfileInconsistentInitially, int, 0) JITMETADATAMETRIC(ProfileInconsistentResetLeave, int, 0) @@ -60,6 +65,11 @@ JITMETADATAMETRIC(ProfileInconsistentImporterBranchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentImporterSwitchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentChainedGDV, int, 0) JITMETADATAMETRIC(ProfileInconsistentScratchBB, int, 0) +JITMETADATAMETRIC(ProfileInconsistentInlinerBranchFold, int, 0) +JITMETADATAMETRIC(ProfileInconsistentInlineeScale, int, 0) +JITMETADATAMETRIC(ProfileInconsistentInlinee, int, 0) +JITMETADATAMETRIC(ProfileInconsistentNoReturnInlinee, int, 0) +JITMETADATAMETRIC(ProfileInconsistentMayThrowInlinee, int, 0) #undef JITMETADATA #undef JITMETADATAINFO