diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 26e731252a39f..69017ea2dfb54 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -328,6 +328,60 @@ FlowEdge* Compiler::BlockDominancePreds(BasicBlock* blk) return res; } +//------------------------------------------------------------------------ +// IsInsertedSsaLiveIn: See if a local is marked as being live-in to a block in +// the side table with locals inserted into SSA. +// +// Arguments: +// block - The block +// lclNum - The local +// +// Returns: +// True if the local is marked as live-in to that block +// +bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) +{ + assert(lvaGetDesc(lclNum)->lvInSsa); + + if (m_insertedSsaLocalsLiveIn == nullptr) + { + return false; + } + + return m_insertedSsaLocalsLiveIn->Lookup(BasicBlockLocalPair(block, lclNum)); +} + +//------------------------------------------------------------------------ +// AddInsertedSsaLiveIn: Mark as local that was inserted into SSA as being +// live-in to a block. +// +// Arguments: +// block - The block +// lclNum - The local +// +// Returns: +// True if this was added anew; false if the local was already marked as such. +// +bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) +{ + // SSA-inserted locals always have explicit reaching defs for all uses, so + // it never makes sense for them to be live into the first block. + assert(block != fgFirstBB); + + if (m_insertedSsaLocalsLiveIn == nullptr) + { + m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA)); + } + + if (m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite)) + { + return false; + } + + JITDUMP("Marked V%02u as live into " FMT_BB "\n", lclNum, block->bbNum); + return true; +} + //------------------------------------------------------------------------ // IsLastHotBlock: see if this is the last block before the cold section // diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3d2f568301bfa..2a920afec06a2 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4978,6 +4978,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables); } + fgInvalidateDfsTree(); + if (doVNBasedDeadStoreRemoval) { // Note: this invalidates SSA and value numbers on tree nodes. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4112ab5014fca..8e2485610aa38 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2387,6 +2387,29 @@ class FlowGraphDominatorTree static FlowGraphDominatorTree* Build(const FlowGraphDfsTree* dfsTree); }; +class FlowGraphDominanceFrontiers +{ + FlowGraphDominatorTree* m_domTree; + BlkToBlkVectorMap m_map; + BitVecTraits m_poTraits; + BitVec m_visited; + + FlowGraphDominanceFrontiers(FlowGraphDominatorTree* domTree); + +#ifdef DEBUG + void Dump(); +#endif + +public: + FlowGraphDominatorTree* GetDomTree() + { + return m_domTree; + } + + static FlowGraphDominanceFrontiers* Build(FlowGraphDominatorTree* domTree); + void ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result); +}; + // Represents a reverse mapping from block back to its (most nested) containing loop. class BlockToNaturalLoopMap { @@ -2925,6 +2948,35 @@ class Compiler return m_dominancePreds; } + struct BasicBlockLocalPair + { + BasicBlock* Block; + unsigned LclNum; + + BasicBlockLocalPair(BasicBlock* block, unsigned lclNum) + : Block(block) + , LclNum(lclNum) + { + } + + static bool Equals(const BasicBlockLocalPair& x, const BasicBlockLocalPair& y) + { + return (x.Block == y.Block) && (x.LclNum == y.LclNum); + } + static unsigned GetHashCode(const BasicBlockLocalPair& val) + { + unsigned hash = val.Block->bbID; + hash ^= val.LclNum + 0x9e3779b9 + (hash << 19) + (hash >> 13); + return hash; + } + }; + + typedef JitHashTable BasicBlockLocalPairSet; + + BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn = nullptr; + bool IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); + bool AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); + void* ehEmitCookie(BasicBlock* block); UNATIVE_OFFSET ehCodeOffset(BasicBlock* block); @@ -5147,6 +5199,7 @@ class Compiler // Dominator tree used by SSA construction and copy propagation (the two are expected to use the same tree // in order to avoid the need for SSA reconstruction and an "out of SSA" phase). FlowGraphDominatorTree* m_domTree = nullptr; + FlowGraphDominanceFrontiers* m_domFrontiers = nullptr; BlockReachabilitySets* m_reachabilitySets = nullptr; // Do we require loops to be in canonical form? The canonical form ensures that: @@ -5419,7 +5472,7 @@ class Compiler void fgMergeBlockReturn(BasicBlock* block); - bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)); + bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg), bool invalidateDFSTreeOnFGChange = true); void fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt); bool gtRemoveTreesAfterNoReturnCall(BasicBlock* block, Statement* stmt); @@ -5717,7 +5770,7 @@ class Compiler // The value numbers for this compilation. ValueNumStore* vnStore = nullptr; - class ValueNumberState* vnState; + class ValueNumberState* vnState = nullptr; public: ValueNumStore* GetValueNumStore() @@ -7682,6 +7735,7 @@ class Compiler LoopLocalOccurrences* loopLocals); bool optCanAndShouldChangeExitTest(GenTree* cond, bool dump); bool optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); + bool optLocalIsLiveIntoBlock(unsigned lclNum, BasicBlock* block); bool optWidenIVs(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); bool optWidenPrimaryIV(FlowGraphNaturalLoop* loop, @@ -12184,7 +12238,7 @@ class DomTreeVisitor public: //------------------------------------------------------------------------ - // WalkTree: Walk the dominator tree. + // WalkTree: Walk the dominator tree starting from the first BB. // // Parameter: // domTree - Dominator tree. diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index c355d02c558c3..16560d52c09bc 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -4748,6 +4748,7 @@ void Compiler::fgDebugCheckFlowGraphAnnotations() assert((m_loops == nullptr) || (m_loops->GetDfsTree() == m_dfsTree)); assert((m_domTree == nullptr) || (m_domTree->GetDfsTree() == m_dfsTree)); + assert((m_domFrontiers == nullptr) || (m_domFrontiers->GetDomTree() == m_domTree)); assert((m_reachabilitySets == nullptr) || (m_reachabilitySets->GetDfsTree() == m_dfsTree)); } diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index d770ddf2630ed..dd4d95ab8356d 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4370,6 +4370,7 @@ void Compiler::fgInvalidateDfsTree() m_dfsTree = nullptr; m_loops = nullptr; m_domTree = nullptr; + m_domFrontiers = nullptr; m_reachabilitySets = nullptr; fgSsaValid = false; } @@ -6603,6 +6604,189 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df return new (comp, CMK_DominatorMemory) FlowGraphDominatorTree(dfsTree, domTree, preorderNums, postorderNums); } +FlowGraphDominanceFrontiers::FlowGraphDominanceFrontiers(FlowGraphDominatorTree* domTree) + : m_domTree(domTree) + , m_map(domTree->GetDfsTree()->GetCompiler()->getAllocator(CMK_DominatorMemory)) + , m_poTraits(domTree->GetDfsTree()->PostOrderTraits()) + , m_visited(BitVecOps::MakeEmpty(&m_poTraits)) +{ +} + +//------------------------------------------------------------------------ +// FlowGraphDominanceFrontiers::Build: Build the dominance frontiers for all +// blocks. +// +// Parameters: +// domTree - Dominator tree to build dominance frontiers for +// +// Returns: +// Data structure representing dominance frontiers. +// +// Remarks: +// Recall that the dominance frontier of a block B is the set of blocks B3 +// such that there exists some B2 s.t. B3 is a successor of B2, and B +// dominates B2 but not B3. Note that this dominance need not be strict -- B2 +// and B may be the same node. +// +// In other words, a block B' is in DF(B) if B dominates an immediate +// predecessor of B', but does not dominate B'. Intuitively, these blocks are +// the "first" blocks that are no longer dominated by B; these are the places +// we are interested in inserting phi definitions that may refer to defs in +// B. +// +// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy. +// +FlowGraphDominanceFrontiers* FlowGraphDominanceFrontiers::Build(FlowGraphDominatorTree* domTree) +{ + const FlowGraphDfsTree* dfsTree = domTree->GetDfsTree(); + Compiler* comp = dfsTree->GetCompiler(); + + FlowGraphDominanceFrontiers* result = new (comp, CMK_DominatorMemory) FlowGraphDominanceFrontiers(domTree); + + for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++) + { + BasicBlock* block = dfsTree->GetPostOrder(i); + + // Recall that B3 is in the dom frontier of B1 if there exists a B2 + // such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor + // of B2. (Note that B1 might be the same block as B2.) + // In that definition, we're considering "block" to be B3, and trying + // to find B1's. To do so, first we consider the predecessors of "block", + // searching for candidate B2's -- "block" is obviously an immediate successor + // of its immediate predecessors. If there are zero or one preds, then there + // is no pred, or else the single pred dominates "block", so no B2 exists. + FlowEdge* blockPreds = comp->BlockPredsWithEH(block); + + // If block has 0/1 predecessor, skip, apart from handler entry blocks + // that are always in the dominance frontier of its enclosed blocks. + if (!comp->bbIsHandlerBeg(block) && ((blockPreds == nullptr) || (blockPreds->getNextPredEdge() == nullptr))) + { + continue; + } + + // Otherwise, there are > 1 preds. Each is a candidate B2 in the definition -- + // *unless* it dominates "block"/B3. + + for (FlowEdge* pred = blockPreds; pred != nullptr; pred = pred->getNextPredEdge()) + { + BasicBlock* predBlock = pred->getSourceBlock(); + + if (!dfsTree->Contains(predBlock)) + { + continue; + } + + // If we've found a B2, then consider the possible B1's. We start with + // B2, since a block dominates itself, then traverse upwards in the dominator + // tree, stopping when we reach the root, or the immediate dominator of "block"/B3. + // (Note that we are guaranteed to encounter this immediate dominator of "block"/B3: + // a predecessor must be dominated by B3's immediate dominator.) + // Along this way, make "block"/B3 part of the dom frontier of the B1. + // When we reach this immediate dominator, the definition no longer applies, since this + // potential B1 *does* dominate "block"/B3, so we stop. + for (BasicBlock* b1 = predBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop + b1 = b1->bbIDom) + { + BlkVector& b1DF = *result->m_map.Emplace(b1, comp->getAllocator(CMK_DominatorMemory)); + // It's possible to encounter the same DF multiple times, ensure that we don't add duplicates. + if (b1DF.empty() || (b1DF.back() != block)) + { + b1DF.push_back(block); + } + } + } + } + + return result; +} + +//------------------------------------------------------------------------ +// ComputeIteratedDominanceFrontier: Compute the iterated dominance frontier of +// a block. This is the transitive closure of taking dominance frontiers. +// +// Parameters: +// block - Block to compute iterated dominance frontier for. +// result - Vector to add blocks of IDF into. +// +// Remarks: +// When we create phi definitions we are creating new definitions that +// themselves induce the creation of more phi nodes. Thus, the transitive +// closure of DF(B) contains all blocks that may have phi definitions +// referring to defs in B, or referring to other phis referring to defs in B. +// +void FlowGraphDominanceFrontiers::ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result) +{ + assert(result->empty()); + + BlkVector* bDF = m_map.LookupPointer(block); + + if (bDF == nullptr) + { + return; + } + + // Compute IDF(b) - start by adding DF(b) to IDF(b). + result->reserve(bDF->size()); + BitVecOps::ClearD(&m_poTraits, m_visited); + + for (BasicBlock* f : *bDF) + { + BitVecOps::AddElemD(&m_poTraits, m_visited, f->bbPostorderNum); + result->push_back(f); + } + + // Now for each block f from IDF(b) add DF(f) to IDF(b). This may result in new + // blocks being added to IDF(b) and the process repeats until no more new blocks + // are added. Note that since we keep adding to bIDF we can't use iterators as + // they may get invalidated. This happens to be a convenient way to avoid having + // to track newly added blocks in a separate set. + for (size_t newIndex = 0; newIndex < result->size(); newIndex++) + { + BasicBlock* f = (*result)[newIndex]; + BlkVector* fDF = m_map.LookupPointer(f); + + if (fDF == nullptr) + { + continue; + } + + for (BasicBlock* ff : *fDF) + { + if (BitVecOps::TryAddElemD(&m_poTraits, m_visited, ff->bbPostorderNum)) + { + result->push_back(ff); + } + } + } +} + +#ifdef DEBUG +//------------------------------------------------------------------------ +// FlowGraphDominanceFrontiers::Dump: Dump a textual representation of the +// dominance frontiers to jitstdout. +// +void FlowGraphDominanceFrontiers::Dump() +{ + printf("DF:\n"); + for (unsigned i = 0; i < m_domTree->GetDfsTree()->GetPostOrderCount(); ++i) + { + BasicBlock* b = m_domTree->GetDfsTree()->GetPostOrder(i); + printf("Block " FMT_BB " := {", b->bbNum); + + BlkVector* bDF = m_map.LookupPointer(b); + if (bDF != nullptr) + { + int index = 0; + for (BasicBlock* f : *bDF) + { + printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); + } + } + printf("}\n"); + } +} +#endif + //------------------------------------------------------------------------ // BlockToNaturalLoopMap::GetLoop: Map a block back to its most nested // containing loop. diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 7130c56630067..4a30e7325e2af 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1222,9 +1222,14 @@ PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) } } - if ((result == PhaseStatus::MODIFIED_EVERYTHING) && opts.OptimizationEnabled()) + if (result == PhaseStatus::MODIFIED_EVERYTHING) { - fgRenumberBlocks(); + fgInvalidateDfsTree(); + + if (opts.OptimizationEnabled()) + { + fgRenumberBlocks(); + } } return result; diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 6020506150b9a..06cae5799a0da 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -391,8 +391,10 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); + assert(dsc->lvInSsa); + BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (!optLocalIsLiveIntoBlock(lclNum, exit)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; @@ -422,7 +424,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) block->VisitAllSuccs(this, [=](BasicBlock* succ) { if (!loop->ContainsBlock(succ) && bbIsHandlerBeg(succ)) { - assert(!VarSetOps::IsMember(this, succ->bbLiveIn, dsc->lvVarIndex) && + assert(!optLocalIsLiveIntoBlock(lclNum, succ) && "Candidate IV for widening is live into exceptional exit"); } @@ -534,8 +536,10 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, // Now account for the cost of sinks. LclVarDsc* dsc = lvaGetDesc(lclNum); + assert(dsc->lvInSsa); + loop->VisitRegularExitBlocks([&](BasicBlock* exit) { - if (VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (optLocalIsLiveIntoBlock(lclNum, exit)) { savedSize -= ExtensionSize; savedCost -= exit->getBBWeight(this) * ExtensionCost; @@ -583,8 +587,10 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, void Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); + assert(dsc->lvInSsa); + loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (!optLocalIsLiveIntoBlock(lclNum, exit)) { return BasicBlockVisit::Continue; } @@ -1284,13 +1290,14 @@ bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loo return true; } - if (!varDsc->lvTracked) + if (!varDsc->lvTracked && !varDsc->lvInSsa) { + // We do not have liveness we can use for this untracked local. return true; } BasicBlockVisit visitResult = loop->VisitRegularExitBlocks([=](BasicBlock* block) { - if (VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex)) + if (optLocalIsLiveIntoBlock(lclNum, block)) { return BasicBlockVisit::Abort; } @@ -1310,6 +1317,30 @@ bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loo return false; } +//------------------------------------------------------------------------ +// optLocalIsLiveIntoBlock: +// Check if a local is live into a block. Required liveness information for the local to be present +// (either because of it being tracked, or from being an SSA-inserted local). +// +// Parameters: +// lclNum - The local +// block - The block +// +// Returns: +// True if the local is live into that block. +// +bool Compiler::optLocalIsLiveIntoBlock(unsigned lclNum, BasicBlock* block) +{ + LclVarDsc* dsc = lvaGetDesc(lclNum); + if (dsc->lvTracked) + { + return VarSetOps::IsMember(this, block->bbLiveIn, dsc->lvVarIndex); + } + + assert(dsc->lvInSsa); + return IsInsertedSsaLiveIn(block, lclNum); +} + struct CursorInfo { BasicBlock* Block; @@ -2579,9 +2610,21 @@ PhaseStatus Compiler::optInductionVariables() bool changed = false; optReachableBitVecTraits = nullptr; - m_dfsTree = fgComputeDfs(); - m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + + if (m_dfsTree == nullptr) + { + m_dfsTree = fgComputeDfs(); + } + + if (m_domTree == nullptr) + { + m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); + } + + if (m_loops == nullptr) + { + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } LoopLocalOccurrences loopLocals(m_loops); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 685d74944edcf..0f27d1b3408b6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12840,9 +12840,11 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // fgMorphBlockStmt: morph a single statement in a block. // // Arguments: -// block - block containing the statement -// stmt - statement to morph -// msg - string to identify caller in a dump +// block - block containing the statement +// stmt - statement to morph +// msg - string to identify caller in a dump +// invalidateDFSTreeOnFGChange - whether or not the DFS tree should be invalidated +// by this function if it makes a flow graph change // // Returns: // true if 'stmt' was removed from the block. @@ -12851,7 +12853,9 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // Notes: // Can be called anytime, unlike fgMorphStmts() which should only be called once. // -bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)) +bool Compiler::fgMorphBlockStmt(BasicBlock* block, + Statement* stmt DEBUGARG(const char* msg), + bool invalidateDFSTreeOnFGChange) { assert(block != nullptr); assert(stmt != nullptr); @@ -12903,7 +12907,11 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(cons if (!removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock) { FoldResult const fr = fgFoldConditional(block); - removedStmt = (fr == FoldResult::FOLD_REMOVED_LAST_STMT); + if (invalidateDFSTreeOnFGChange && (fr != FoldResult::FOLD_DID_NOTHING)) + { + fgInvalidateDfsTree(); + } + removedStmt = (fr == FoldResult::FOLD_REMOVED_LAST_STMT); } if (!removedStmt) @@ -12943,6 +12951,11 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(cons { // Convert block to a throw bb fgConvertBBToThrowBB(block); + + if (invalidateDFSTreeOnFGChange) + { + fgInvalidateDfsTree(); + } } #ifdef DEBUG diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index c7fb0fe6f4fb7..29c2e610ef125 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -17,6 +17,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif #include "optcse.h" +#include "ssabuilder.h" #ifdef DEBUG #define RLDUMP(...) \ @@ -4835,26 +4836,8 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) // // Later we will unmark any nested CSE's for the CSE uses. // - // If there's just a single def for the CSE, we'll put this - // CSE into SSA form on the fly. We won't need any PHIs. - // - unsigned cseSsaNum = SsaConfig::RESERVED_SSA_NUM; - LclSsaVarDsc* ssaVarDsc = nullptr; - - if (dsc->csdDefCount == 1) - { - JITDUMP(FMT_CSE " is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex, cseLclVarNum); - lclDsc->lvInSsa = true; - // Allocate the ssa num - CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); - cseSsaNum = lclDsc->lvPerSsaData.AllocSsaNum(allocator); - ssaVarDsc = lclDsc->GetPerSsaData(cseSsaNum); - } - else - { - INDEBUG(lclDsc->lvIsMultiDefCSE = 1); - } + INDEBUG(lclDsc->lvIsMultiDefCSE = dsc->csdDefCount > 1); // Verify that all of the ValueNumbers in this list are correct as // Morph will change them when it performs a mutating operation. @@ -5037,18 +5020,9 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) // // Create a reference to the CSE temp - GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + GenTreeLclVar* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // Assign the ssa num for the lclvar use. Note it may be the reserved num. - cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - - // If this local is in ssa, notify ssa there's a new use. - if (ssaVarDsc != nullptr) - { - ssaVarDsc->AddUse(blk); - } - cse = cseLclVar; if (isSharedConst) { @@ -5208,42 +5182,16 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) store->gtVNPair = ValueNumStore::VNPForVoid(); // The store node itself is $VN.Void. noway_assert(store->OperIs(GT_STORE_LCL_VAR)); - // Backpatch the SSA def, if we're putting this CSE temp into ssa. - store->AsLclVar()->SetSsaNum(cseSsaNum); - // Move the information about the CSE def to the store; it now indicates a completed // CSE def instead of just a candidate. optCSE_canSwap uses this information to reason // about evaluation order in between substitutions of CSE defs/uses. store->gtCSEnum = exp->gtCSEnum; exp->gtCSEnum = NO_CSE; - if (cseSsaNum != SsaConfig::RESERVED_SSA_NUM) - { - LclSsaVarDsc* ssaVarDsc = m_pCompiler->lvaTable[cseLclVarNum].GetPerSsaData(cseSsaNum); - - // These should not have been set yet, since this is the first and - // only def for this CSE. - assert(ssaVarDsc->GetBlock() == nullptr); - assert(ssaVarDsc->GetDefNode() == nullptr); - - ssaVarDsc->m_vnPair = val->gtVNPair; - ssaVarDsc->SetBlock(blk); - ssaVarDsc->SetDefNode(store->AsLclVarCommon()); - } - /* Create a reference to the CSE temp */ GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // Assign the ssa num for the lclvar use. Note it may be the reserved num. - cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - - // If this local is in ssa, notify ssa there's a new use. - if (ssaVarDsc != nullptr) - { - ssaVarDsc->AddUse(blk); - } - GenTree* cseUse = cseLclVar; if (isSharedConst) { @@ -5303,6 +5251,33 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) m_pCompiler->gtUpdateStmtSideEffects(stmt); } while (lst != nullptr); + + ArrayStack defs(m_pCompiler->getAllocator(CMK_CSE)); + ArrayStack uses(m_pCompiler->getAllocator(CMK_CSE)); + + lst = dsc->csdTreeList; + do + { + Statement* lstStmt = lst->tslStmt; + for (GenTree* tree : lstStmt->TreeList()) + { + if (tree->OperIs(GT_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == cseLclVarNum)) + { + uses.Push(UseDefLocation(lst->tslBlock, lstStmt, tree->AsLclVar())); + } + if (tree->OperIs(GT_STORE_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == cseLclVarNum)) + { + defs.Push(UseDefLocation(lst->tslBlock, lstStmt, tree->AsLclVar())); + } + } + + do + { + lst = lst->tslNext; + } while ((lst != nullptr) && (lst->tslStmt == lstStmt)); + } while (lst != nullptr); + + SsaBuilder::InsertInSsa(m_pCompiler, cseLclVarNum, defs, uses); } void CSE_Heuristic::AdjustHeuristic(CSE_Candidate* successfulCandidate) diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index bd98272957718..56947263f8628 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -958,7 +958,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) JITDUMP("\nRedundant branch opt in " FMT_BB ":\n", block->bbNum); - fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__)); + fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__), /* invalidateDFSTreeOnFGChange */ false); Metrics.RedundantBranchesEliminated++; return true; } diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index a0b018b3078b4..40d9562e30249 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -86,192 +86,10 @@ void Compiler::fgResetForSsa() SsaBuilder::SsaBuilder(Compiler* pCompiler) : m_pCompiler(pCompiler) , m_allocator(pCompiler->getAllocator(CMK_SSA)) - , m_visitedTraits(0, pCompiler) // at this point we do not know the size, SetupBBRoot can add a block , m_renameStack(m_allocator, pCompiler->lvaCount) { } -//------------------------------------------------------------------------ -// ComputeDominanceFrontiers: Compute flow graph dominance frontiers -// -// Arguments: -// postOrder - an array containing all flow graph blocks -// count - the number of blocks in the postOrder array -// mapDF - a caller provided hashtable that will be populated -// with blocks and their dominance frontiers (only those -// blocks that have non-empty frontiers will be included) -// -// Notes: -// Recall that the dominance frontier of a block B is the set of blocks -// B3 such that there exists some B2 s.t. B3 is a successor of B2, and -// B dominates B2. Note that this dominance need not be strict -- B2 -// and B may be the same node. -// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy. -// -void SsaBuilder::ComputeDominanceFrontiers(BasicBlock** postOrder, int count, BlkToBlkVectorMap* mapDF) -{ - DBG_SSA_JITDUMP("Computing DF:\n"); - - for (int i = 0; i < count; ++i) - { - BasicBlock* block = postOrder[i]; - - DBG_SSA_JITDUMP("Considering block " FMT_BB ".\n", block->bbNum); - - // Recall that B3 is in the dom frontier of B1 if there exists a B2 - // such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor - // of B2. (Note that B1 might be the same block as B2.) - // In that definition, we're considering "block" to be B3, and trying - // to find B1's. To do so, first we consider the predecessors of "block", - // searching for candidate B2's -- "block" is obviously an immediate successor - // of its immediate predecessors. If there are zero or one preds, then there - // is no pred, or else the single pred dominates "block", so no B2 exists. - - FlowEdge* blockPreds = m_pCompiler->BlockPredsWithEH(block); - - // If block has 0/1 predecessor, skip, apart from handler entry blocks - // that are always in the dominance frontier of its enclosed blocks. - if (!m_pCompiler->bbIsHandlerBeg(block) && - ((blockPreds == nullptr) || (blockPreds->getNextPredEdge() == nullptr))) - { - DBG_SSA_JITDUMP(" Has %d preds; skipping.\n", blockPreds == nullptr ? 0 : 1); - continue; - } - - // Otherwise, there are > 1 preds. Each is a candidate B2 in the definition -- - // *unless* it dominates "block"/B3. - - FlowGraphDfsTree* dfsTree = m_pCompiler->m_dfsTree; - FlowGraphDominatorTree* domTree = m_pCompiler->m_domTree; - - for (FlowEdge* pred = blockPreds; pred != nullptr; pred = pred->getNextPredEdge()) - { - BasicBlock* predBlock = pred->getSourceBlock(); - DBG_SSA_JITDUMP(" Considering predecessor " FMT_BB ".\n", predBlock->bbNum); - - if (!dfsTree->Contains(predBlock)) - { - DBG_SSA_JITDUMP(" Unreachable node\n"); - continue; - } - - // If we've found a B2, then consider the possible B1's. We start with - // B2, since a block dominates itself, then traverse upwards in the dominator - // tree, stopping when we reach the root, or the immediate dominator of "block"/B3. - // (Note that we are guaranteed to encounter this immediate dominator of "block"/B3: - // a predecessor must be dominated by B3's immediate dominator.) - // Along this way, make "block"/B3 part of the dom frontier of the B1. - // When we reach this immediate dominator, the definition no longer applies, since this - // potential B1 *does* dominate "block"/B3, so we stop. - for (BasicBlock* b1 = predBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop - b1 = b1->bbIDom) - { - DBG_SSA_JITDUMP(" Adding " FMT_BB " to dom frontier of pred dom " FMT_BB ".\n", block->bbNum, - b1->bbNum); - - BlkVector& b1DF = *mapDF->Emplace(b1, m_allocator); - // It's possible to encounter the same DF multiple times, ensure that we don't add duplicates. - if (b1DF.empty() || (b1DF.back() != block)) - { - b1DF.push_back(block); - } - } - } - } - -#ifdef DEBUG - if (m_pCompiler->verboseSsa) - { - printf("\nComputed DF:\n"); - for (int i = 0; i < count; ++i) - { - BasicBlock* b = postOrder[i]; - printf("Block " FMT_BB " := {", b->bbNum); - - BlkVector* bDF = mapDF->LookupPointer(b); - if (bDF != nullptr) - { - int index = 0; - for (BasicBlock* f : *bDF) - { - printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); - } - } - printf("}\n"); - } - } -#endif -} - -//------------------------------------------------------------------------ -// ComputeIteratedDominanceFrontier: Compute the iterated dominance frontier -// for the specified block. -// -// Arguments: -// b - the block to computed the frontier for -// mapDF - a map containing the dominance frontiers of all blocks -// bIDF - a caller provided vector where the IDF is to be stored -// -// Notes: -// The iterated dominance frontier is formed by a closure operation: -// the IDF of B is the smallest set that includes B's dominance frontier, -// and also includes the dominance frontier of all elements of the set. -// -void SsaBuilder::ComputeIteratedDominanceFrontier(BasicBlock* b, const BlkToBlkVectorMap* mapDF, BlkVector* bIDF) -{ - assert(bIDF->empty()); - - BlkVector* bDF = mapDF->LookupPointer(b); - - if (bDF != nullptr) - { - // Compute IDF(b) - start by adding DF(b) to IDF(b). - bIDF->reserve(bDF->size()); - BitVecOps::ClearD(&m_visitedTraits, m_visited); - - for (BasicBlock* f : *bDF) - { - BitVecOps::AddElemD(&m_visitedTraits, m_visited, f->bbPostorderNum); - bIDF->push_back(f); - } - - // Now for each block f from IDF(b) add DF(f) to IDF(b). This may result in new - // blocks being added to IDF(b) and the process repeats until no more new blocks - // are added. Note that since we keep adding to bIDF we can't use iterators as - // they may get invalidated. This happens to be a convenient way to avoid having - // to track newly added blocks in a separate set. - for (size_t newIndex = 0; newIndex < bIDF->size(); newIndex++) - { - BasicBlock* f = (*bIDF)[newIndex]; - BlkVector* fDF = mapDF->LookupPointer(f); - - if (fDF != nullptr) - { - for (BasicBlock* ff : *fDF) - { - if (BitVecOps::TryAddElemD(&m_visitedTraits, m_visited, ff->bbPostorderNum)) - { - bIDF->push_back(ff); - } - } - } - } - } - -#ifdef DEBUG - if (m_pCompiler->verboseSsa) - { - printf("IDF(" FMT_BB ") := {", b->bbNum); - int index = 0; - for (BasicBlock* f : *bIDF) - { - printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); - } - printf("}\n"); - } -#endif -} - /** * Returns the phi GT_PHI node if the variable already has a phi node. * @@ -280,7 +98,7 @@ void SsaBuilder::ComputeIteratedDominanceFrontier(BasicBlock* b, const BlkToBlkV * * @return If there is a phi node for the lclNum, returns the GT_PHI tree, else NULL. */ -static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum) +static Statement* GetPhiNode(BasicBlock* block, unsigned lclNum) { // Walk the statements for phi nodes. for (Statement* const stmt : block->Statements()) @@ -295,7 +113,7 @@ static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum) GenTree* tree = stmt->GetRootNode(); if (tree->AsLclVar()->GetLclNum() == lclNum) { - return tree->AsLclVar()->Data(); + return stmt; } } @@ -309,19 +127,22 @@ static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum) // block - The block where to insert the statement // lclNum - The variable number // -void SsaBuilder::InsertPhi(BasicBlock* block, unsigned lclNum) +// Returns: +// Inserted phi definition. +// +Statement* SsaBuilder::InsertPhi(Compiler* comp, BasicBlock* block, unsigned lclNum) { - var_types type = m_pCompiler->lvaGetDesc(lclNum)->TypeGet(); + var_types type = comp->lvaGetDesc(lclNum)->TypeGet(); // PHIs and all the associated nodes do not generate any code so the costs are always 0 - GenTree* phi = new (m_pCompiler, GT_PHI) GenTreePhi(type); + GenTree* phi = new (comp, GT_PHI) GenTreePhi(type); phi->SetCosts(0, 0); - GenTree* store = m_pCompiler->gtNewStoreLclVarNode(lclNum, phi); + GenTreeLclVar* store = comp->gtNewStoreLclVarNode(lclNum, phi); store->SetCosts(0, 0); store->gtType = type; // TODO-ASG-Cleanup: delete. This quirk avoided diffs from costing-induced tail dup. // Create the statement and chain everything in linear order - PHI, STORE_LCL_VAR. - Statement* stmt = m_pCompiler->gtNewStmt(store); + Statement* stmt = comp->gtNewStmt(store); stmt->SetTreeList(phi); phi->gtNext = store; store->gtPrev = phi; @@ -334,9 +155,10 @@ void SsaBuilder::InsertPhi(BasicBlock* block, unsigned lclNum) } #endif // DEBUG - m_pCompiler->fgInsertStmtAtBeg(block, stmt); + comp->fgInsertStmtAtBeg(block, stmt); JITDUMP("Added PHI definition for V%02u at start of " FMT_BB ".\n", lclNum, block->bbNum); + return stmt; } //------------------------------------------------------------------------ @@ -378,16 +200,39 @@ void SsaBuilder::AddPhiArg( } // Didn't find a match, add a new phi arg - // - var_types type = m_pCompiler->lvaGetDesc(lclNum)->TypeGet(); + AddNewPhiArg(m_pCompiler, block, stmt, phi, lclNum, ssaNum, pred); +} + +//------------------------------------------------------------------------ +// AddNewPhiArg: Do the IR manipulations to add a new phi arg to a GenTreePhi +// node. +// +// Arguments: +// comp - Compiler instance +// block - Block containing the phi node +// stmt - The statement that contains the GT_PHI node +// phi - The phi node +// lclNum - The local +// ssaNum - SSA number of the phi arg +// pred - The predecessor block corresponding to the phi arg +// +void SsaBuilder::AddNewPhiArg(Compiler* comp, + BasicBlock* block, + Statement* stmt, + GenTreePhi* phi, + unsigned lclNum, + unsigned ssaNum, + BasicBlock* pred) +{ + var_types type = comp->lvaGetDesc(lclNum)->TypeGet(); - GenTree* phiArg = new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(type, lclNum, ssaNum, pred); + GenTree* phiArg = new (comp, GT_PHI_ARG) GenTreePhiArg(type, lclNum, ssaNum, pred); // Costs are not relevant for PHI args. phiArg->SetCosts(0, 0); // The argument order doesn't matter so just insert at the front of the list because // it's easier. It's also easier to insert in linear order since the first argument // will be first in linear order as well. - phi->gtUses = new (m_pCompiler, CMK_ASTNode) GenTreePhi::Use(phiArg, phi->gtUses); + phi->gtUses = new (comp, CMK_ASTNode) GenTreePhi::Use(phiArg, phi->gtUses); GenTree* head = stmt->GetTreeList(); assert(head->OperIs(GT_PHI, GT_PHI_ARG)); @@ -395,7 +240,7 @@ void SsaBuilder::AddPhiArg( phiArg->gtNext = head; head->gtPrev = phiArg; - LclVarDsc* const varDsc = m_pCompiler->lvaGetDesc(lclNum); + LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum); LclSsaVarDsc* const ssaDesc = varDsc->GetPerSsaData(ssaNum); ssaDesc->AddPhiUse(block); @@ -427,10 +272,11 @@ void SsaBuilder::InsertPhiFunctions() unsigned count = dfsTree->GetPostOrderCount(); // Compute dominance frontier. - BlkToBlkVectorMap mapDF(m_allocator); - ComputeDominanceFrontiers(postOrder, count, &mapDF); + m_pCompiler->m_domFrontiers = FlowGraphDominanceFrontiers::Build(m_pCompiler->m_domTree); EndPhase(PHASE_BUILD_SSA_DF); + DBEXEC(m_pCompiler->verboseSsa, m_pCompiler->m_domTree->Dump()); + // Use the same IDF vector for all blocks to avoid unnecessary memory allocations BlkVector blockIDF(m_allocator); @@ -442,7 +288,20 @@ void SsaBuilder::InsertPhiFunctions() DBG_SSA_JITDUMP("Considering dominance frontier of block " FMT_BB ":\n", block->bbNum); blockIDF.clear(); - ComputeIteratedDominanceFrontier(block, &mapDF, &blockIDF); + m_pCompiler->m_domFrontiers->ComputeIteratedDominanceFrontier(block, &blockIDF); + +#ifdef DEBUG + if (m_pCompiler->verboseSsa) + { + printf("IDF(" FMT_BB ") := {", block->bbNum); + int index = 0; + for (BasicBlock* f : blockIDF) + { + printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); + } + printf("}\n"); + } +#endif if (blockIDF.empty()) { @@ -480,7 +339,7 @@ void SsaBuilder::InsertPhiFunctions() { // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of // j. So insert a phi node at l. - InsertPhi(bbInDomFront, lclNum); + InsertPhi(m_pCompiler, bbInDomFront, lclNum); } } } @@ -1300,9 +1159,6 @@ void SsaBuilder::Build() { JITDUMP("*************** In SsaBuilder::Build()\n"); - m_visitedTraits = m_pCompiler->m_dfsTree->PostOrderTraits(); - m_visited = BitVecOps::MakeEmpty(&m_visitedTraits); - // Compute liveness on the graph. m_pCompiler->fgLocalVarLiveness(); EndPhase(PHASE_BUILD_SSA_LIVENESS); @@ -1505,3 +1361,499 @@ void Compiler::JitTestCheckSSA() } } #endif // DEBUG + +class IncrementalLiveInBuilder +{ + Compiler* m_comp; + ArrayStack m_queue; + +public: + IncrementalLiveInBuilder(Compiler* comp) + : m_comp(comp) + , m_queue(comp->getAllocator(CMK_SSA)) + { + } + + void MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef); +}; + +//------------------------------------------------------------------------ +// MarkLiveInBackwards: Given a use and its reaching definition, mark that +// local as live-in into all blocks on the path from the reaching definition to +// the use. +// +// Parameters: +// lclNum - The local +// use - The use +// reachingDef - The reaching definition of the use +// +void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, + const UseDefLocation& use, + const UseDefLocation& reachingDef) +{ + if (use.Block == reachingDef.Block) + { + // No work to be done + return; + } + + if (!m_comp->AddInsertedSsaLiveIn(use.Block, lclNum)) + { + // We've already marked this block as live-in before -- no need to + // repeat that work (everyone should agree on reaching defs) + return; + } + + m_queue.Reset(); + m_queue.Push(use.Block); + + while (!m_queue.Empty()) + { + BasicBlock* block = m_queue.Pop(); + + for (FlowEdge* edge = m_comp->BlockPredsWithEH(block); edge != nullptr; edge = edge->getNextPredEdge()) + { + BasicBlock* pred = edge->getSourceBlock(); + if (pred == reachingDef.Block) + { + continue; + } + + if (m_comp->AddInsertedSsaLiveIn(pred, lclNum)) + { + m_queue.Push(pred); + } + } + } +} + +class IncrementalSsaBuilder +{ + Compiler* m_comp; + unsigned m_lclNum; + ArrayStack& m_defs; + ArrayStack& m_uses; + BitVecTraits m_poTraits; + BitVec m_defBlocks; + BitVec m_iteratedDominanceFrontiers; + IncrementalLiveInBuilder m_liveInBuilder; + + UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); + bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); + bool FindReachingDefInSameStatement(const UseDefLocation& use, UseDefLocation* def); + Statement* LatestStatement(Statement* stmt1, Statement* stmt2); +public: + IncrementalSsaBuilder(Compiler* comp, + unsigned lclNum, + ArrayStack& defs, + ArrayStack& uses) + : m_comp(comp) + , m_lclNum(lclNum) + , m_defs(defs) + , m_uses(uses) + , m_poTraits(comp->m_dfsTree->PostOrderTraits()) + , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) + , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) + , m_liveInBuilder(comp) + { + } + + bool Insert(); + static void MarkLiveInBackwards(Compiler* comp, + unsigned lclNum, + const UseDefLocation& use, + const UseDefLocation& reachingDef, + BitVec& visitedSet); +}; + +//------------------------------------------------------------------------ +// FindOrCreateReachingDef: Given a use indicated by a block and potentially a +// statement and tree, find the reaching definition for it, potentially +// creating it if the reaching definition is a phi that has not been created +// yet. +// +// Parameters: +// use - The use. The block must be non-null. The statement and tree can be +// null, meaning that the use is happening after the last statement in the +// block. +// +// Returns: +// Location of a definition node that is the reaching def. +// +UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& use) +{ + for (BasicBlock* dom = use.Block; dom != nullptr; dom = dom->bbIDom) + { + UseDefLocation reachingDef; + if (BitVecOps::IsMember(&m_poTraits, m_defBlocks, dom->bbPostorderNum) && + FindReachingDefInBlock(use, dom, &reachingDef)) + { + return reachingDef; + } + + if (BitVecOps::IsMember(&m_poTraits, m_iteratedDominanceFrontiers, dom->bbPostorderNum)) + { + Statement* phiDef = GetPhiNode(dom, m_lclNum); + if (phiDef == nullptr) + { + phiDef = SsaBuilder::InsertPhi(m_comp, dom, m_lclNum); + + LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(m_comp->getAllocator(CMK_SSA), dom, + phiDef->GetRootNode()->AsLclVarCommon()); + phiDef->GetRootNode()->AsLclVar()->SetSsaNum(ssaNum); + + GenTreePhi* phi = phiDef->GetRootNode()->AsLclVar()->Data()->AsPhi(); + + // The local is always live into blocks with phi defs. + bool marked = m_comp->AddInsertedSsaLiveIn(dom, m_lclNum); + assert(marked); + + for (FlowEdge* predEdge = m_comp->BlockPredsWithEH(dom); predEdge != nullptr; + predEdge = predEdge->getNextPredEdge()) + { + BasicBlock* pred = predEdge->getSourceBlock(); + if (!m_comp->m_dfsTree->Contains(pred)) + { + continue; + } + + UseDefLocation phiArgUse = UseDefLocation(pred, nullptr, nullptr); + UseDefLocation phiArgReachingDef = FindOrCreateReachingDef(phiArgUse); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgReachingDef.Tree->GetSsaNum(), + pred); + + // The phi arg is modelled at the end of the pred block; + // mark liveness for it. + m_liveInBuilder.MarkLiveInBackwards(m_lclNum, phiArgUse, phiArgReachingDef); + } + + m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); + + JITDUMP(" New phi def:\n"); + DISPSTMT(phiDef); + } + + return UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); + } + } + + assert(!"Found use without any def"); + unreached(); +} + +//------------------------------------------------------------------------ +// FindReachingDefInBlock: Given a use, try to find a definition in the +// specified block. +// +// Parameters: +// use - The use. The block must be non-null. The statement and tree can be +// null, meaning that the use is happening after the last statement in the +// block. +// block - The block to look for a definition in. +// def - [out] The found definition, if any. +// +// Returns: +// True if a reaching definition was found in "block". +// +// Remarks: +// If the use occurs in "block", then this function takes care to find the +// latest definition before the use. +// +bool IncrementalSsaBuilder::FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def) +{ + Statement* latestDefStmt = nullptr; + GenTreeLclVar* latestTree = nullptr; + + for (int i = 0; i < m_defs.Height(); i++) + { + UseDefLocation& candidate = m_defs.BottomRef(i); + if (candidate.Block != block) + { + continue; + } + + if (candidate.Stmt == use.Stmt) + { + if (FindReachingDefInSameStatement(use, def)) + { + return true; + } + + continue; + } + + if ((candidate.Block == use.Block) && (use.Stmt != nullptr) && + (LatestStatement(use.Stmt, candidate.Stmt) != use.Stmt)) + { + // Def is after use + continue; + } + + if (candidate.Stmt == latestDefStmt) + { + latestTree = nullptr; + } + else if ((latestDefStmt == nullptr) || (LatestStatement(candidate.Stmt, latestDefStmt) == candidate.Stmt)) + { + latestDefStmt = candidate.Stmt; + latestTree = candidate.Tree; + } + } + + if (latestDefStmt == nullptr) + { + return false; + } + + if (latestTree == nullptr) + { + for (GenTree* tree : latestDefStmt->TreeList()) + { + if (tree->OperIs(GT_STORE_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == m_lclNum)) + { + latestTree = tree->AsLclVar(); + } + } + + assert(latestTree != nullptr); + } + + *def = UseDefLocation(use.Block, latestDefStmt, latestTree); + return true; +} + +//------------------------------------------------------------------------ +// FindReachingDefInSameStatement: Given a use, try to find a definition within +// the same statement as that use. +// +// Parameters: +// use - The use. +// def - [out] The found definition, if any. +// +// Returns: +// True if a reaching definition was found in the same statement as the use. +// +bool IncrementalSsaBuilder::FindReachingDefInSameStatement(const UseDefLocation& use, UseDefLocation* def) +{ + for (GenTree* tree = use.Tree->gtPrev; tree != nullptr; tree = tree->gtPrev) + { + if (tree->OperIs(GT_STORE_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == m_lclNum)) + { + *def = UseDefLocation(use.Block, use.Stmt, tree->AsLclVar()); + return true; + } + } + + return false; +} + +//------------------------------------------------------------------------ +// LatestStatement: Given two statements in the same block, find the latest one +// of them. +// +// Parameters: +// stmt1 - The first statement +// stmt2 - The second statement +// +// Returns: +// Latest of the two statements. +// +Statement* IncrementalSsaBuilder::LatestStatement(Statement* stmt1, Statement* stmt2) +{ + if (stmt1 == stmt2) + { + return stmt1; + } + + Statement* cursor1 = stmt1->GetNextStmt(); + Statement* cursor2 = stmt2->GetNextStmt(); + + while (true) + { + if ((cursor1 == stmt2) || (cursor2 == nullptr)) + { + return stmt2; + } + + if ((cursor2 == stmt1) || (cursor1 == nullptr)) + { + return stmt1; + } + + cursor1 = cursor1->GetNextStmt(); + cursor2 = cursor2->GetNextStmt(); + } +} + +//------------------------------------------------------------------------ +// Insert: Insert the uses and definitions in SSA. +// +// Returns: +// True if we were able to insert the local into SSA. False if we gave up +// (due to hitting internal limits). +// +bool IncrementalSsaBuilder::Insert() +{ + FlowGraphDfsTree* dfsTree = m_comp->m_dfsTree; + + // Compute iterated dominance frontiers of all real definitions. These are + // the blocks that unpruned phi definitions would be inserted into. We + // insert the phis lazily to end up with pruned SSA, but we still need to + // know which blocks are candidates for phis. + BlkVector idf(m_comp->getAllocator(CMK_SSA)); + + for (int i = 0; i < m_defs.Height(); i++) + { + BasicBlock* block = m_defs.BottomRef(i).Block; + idf.clear(); + m_comp->m_domFrontiers->ComputeIteratedDominanceFrontier(block, &idf); + + for (BasicBlock* idfBlock : idf) + { + BitVecOps::AddElemD(&m_poTraits, m_iteratedDominanceFrontiers, idfBlock->bbPostorderNum); + } + } + + // The IDF gives a bound on the potential recursion depth of + // FindOrCreateReachingDef. Limit this to a value we know won't stack + // overflow. + if (BitVecOps::Count(&m_poTraits, m_iteratedDominanceFrontiers) > 100) + { + return false; + } + + LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); + // Alloc SSA numbers for all real definitions. + for (int i = 0; i < m_defs.Height(); i++) + { + UseDefLocation& def = m_defs.BottomRef(i); + if (!dfsTree->Contains(def.Block)) + { + continue; + } + + BitVecOps::AddElemD(&m_poTraits, m_defBlocks, def.Block->bbPostorderNum); + + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(m_comp->getAllocator(CMK_SSA), def.Block, def.Tree); + def.Tree->SetSsaNum(ssaNum); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); + ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + JITDUMP(" [%06u] d:%u\n", Compiler::dspTreeID(def.Tree), ssaNum); + } + + // Finally compute all the reaching defs for the uses. + for (int i = 0; i < m_uses.Height(); i++) + { + UseDefLocation& use = m_uses.BottomRef(i); + if (!dfsTree->Contains(use.Block)) + { + continue; + } + + UseDefLocation def = FindOrCreateReachingDef(use); + use.Tree->SetSsaNum(def.Tree->GetSsaNum()); + dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); + JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); + + m_liveInBuilder.MarkLiveInBackwards(m_lclNum, use, def); + } + + return true; +} + +//------------------------------------------------------------------------ +// InsertInSsa: Insert a specified local in SSA given its local number and all +// of its definitions and uses in the IR. +// +// Parameters: +// comp - Compiler instance +// lclNum - The local that is being inserted into SSA +// defs - All STORE_LCL_VAR definitions of the local +// uses - All LCL_VAR uses of the local +// +// Returns: +// True if we were able to insert the local into SSA. False if we gave up +// (due to hitting internal limits). +// +// Remarks: +// All uses are required to never read an uninitialized value of the local. +// That is, this function requires that all paths through the function go +// through one of the defs in "defs" before any use in "uses". +// +bool SsaBuilder::InsertInSsa(Compiler* comp, + unsigned lclNum, + ArrayStack& defs, + ArrayStack& uses) +{ + LclVarDsc* dsc = comp->lvaGetDesc(lclNum); + assert(!dsc->lvInSsa); + + JITDUMP("Putting V%02u into SSA form\n", lclNum); + JITDUMP(" %d defs:", defs.Height()); + for (int i = 0; i < defs.Height(); i++) + { + JITDUMP(" [%06u]", Compiler::dspTreeID(defs.Bottom(i).Tree)); + } + + JITDUMP("\n %d uses:", uses.Height()); + for (int i = 0; i < uses.Height(); i++) + { + JITDUMP(" [%06u]", Compiler::dspTreeID(uses.Bottom(i).Tree)); + } + + JITDUMP("\n"); + + if (defs.Height() == 1) + { + JITDUMP(" Single-def local; putting into SSA directly\n"); + + UseDefLocation& def = defs.BottomRef(0); + + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(comp->getAllocator(CMK_SSA), def.Block, def.Tree); + def.Tree->SetSsaNum(ssaNum); + JITDUMP(" [%06u] d:%u\n", Compiler::dspTreeID(def.Tree), ssaNum); + + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); + ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + + IncrementalLiveInBuilder liveIn(comp); + + for (int i = 0; i < uses.Height(); i++) + { + UseDefLocation& use = uses.BottomRef(i); + use.Tree->SetSsaNum(ssaNum); + ssaDsc->AddUse(use.Block); + JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); + + liveIn.MarkLiveInBackwards(lclNum, use, def); + } + + dsc->lvInSsa = true; + return true; + } + + if (comp->m_dfsTree == nullptr) + { + comp->m_dfsTree = comp->fgComputeDfs(); + } + + if (comp->m_domTree == nullptr) + { + comp->m_domTree = FlowGraphDominatorTree::Build(comp->m_dfsTree); + } + + if (comp->m_domFrontiers == nullptr) + { + comp->m_domFrontiers = FlowGraphDominanceFrontiers::Build(comp->m_domTree); + } + + IncrementalSsaBuilder builder(comp, lclNum, defs, uses); + if (builder.Insert()) + { + dsc->lvInSsa = true; + return true; + } + + return false; +} diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 014edb94f1e01..446f7894e1da4 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -12,8 +12,28 @@ typedef int LclVarNum; // Pair of a local var name eg: V01 and Ssa number; eg: V01_01 typedef std::pair SsaVarName; +struct UseDefLocation +{ + BasicBlock* Block = nullptr; + Statement* Stmt = nullptr; + GenTreeLclVar* Tree = nullptr; + + UseDefLocation() + { + } + + UseDefLocation(BasicBlock* block, Statement* stmt, GenTreeLclVar* tree) + : Block(block) + , Stmt(stmt) + , Tree(tree) + { + } +}; + class SsaBuilder { + friend class IncrementalSsaBuilder; + private: inline void EndPhase(Phases phase) { @@ -33,20 +53,26 @@ class SsaBuilder // variable are stored in the "per SSA data" on the local descriptor. void Build(); + static bool InsertInSsa(Compiler* comp, + unsigned lclNum, + ArrayStack& defs, + ArrayStack& uses); private: - // Compute flow graph dominance frontiers. - void ComputeDominanceFrontiers(BasicBlock** postOrder, int count, BlkToBlkVectorMap* mapDF); - - // Compute the iterated dominance frontier for the specified block. - void ComputeIteratedDominanceFrontier(BasicBlock* b, const BlkToBlkVectorMap* mapDF, BlkVector* bIDF); - // Insert a new GT_PHI statement. - void InsertPhi(BasicBlock* block, unsigned lclNum); + static Statement* InsertPhi(Compiler* comp, BasicBlock* block, unsigned lclNum); // Add a new GT_PHI_ARG node to an existing GT_PHI node void AddPhiArg( BasicBlock* block, Statement* stmt, GenTreePhi* phi, unsigned lclNum, unsigned ssaNum, BasicBlock* pred); + static void AddNewPhiArg(Compiler* comp, + BasicBlock* block, + Statement* stmt, + GenTreePhi* phi, + unsigned lclNum, + unsigned ssaNum, + BasicBlock* pred); + // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning // of basic blocks that require them. @@ -79,12 +105,7 @@ class SsaBuilder // the handlers of a newly entered block based on one entering block. void AddPhiArgsToNewlyEnteredHandler(BasicBlock* predEnterBlock, BasicBlock* enterBlock, BasicBlock* handlerStart); - Compiler* m_pCompiler; - CompAllocator m_allocator; - - // Bit vector used by ComputeImmediateDom to track already visited blocks. - BitVecTraits m_visitedTraits; - BitVec m_visited; - + Compiler* m_pCompiler; + CompAllocator m_allocator; SsaRenameState m_renameStack; }; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index d40e05e35a0e1..1fd0a2711ba70 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10909,6 +10909,7 @@ PhaseStatus Compiler::fgValueNumber() #endif // DEBUG fgVNPassesCompleted++; + vnState = nullptr; return PhaseStatus::MODIFIED_EVERYTHING; } @@ -11196,7 +11197,7 @@ void Compiler::fgValueNumberPhiDef(GenTreeLclVar* newSsaDef, BasicBlock* blk, bo for (GenTreePhi::Use& use : phiNode->Uses()) { GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); - if (!vnState->IsReachableThroughPred(blk, phiArg->gtPredBB)) + if ((vnState != nullptr) && !vnState->IsReachableThroughPred(blk, phiArg->gtPredBB)) { JITDUMP(" Phi arg [%06u] is unnecessary; path through pred " FMT_BB " cannot be taken\n", dspTreeID(phiArg), phiArg->gtPredBB->bbNum);