Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit b2c8f01

Browse files
committed
[DAG, X86] Improve Dependency analysis when doing multi-node
Instruction Selection Cleanup cycle/validity checks in ISel (IsLegalToFold, HandleMergeInputChains) and X86 (isFusableLoadOpStore). Now do a full search for cycles / dependencies pruning the search when topological property of NodeId allows. As part of this propogate the NodeId-based cutoffs to narrow hasPreprocessorHelper searches. Reviewers: craig.topper, bogner Subscribers: llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D41293 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324359 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6378379 commit b2c8f01

18 files changed

+645
-985
lines changed

include/llvm/CodeGen/SelectionDAGNodes.h

+30-6
Original file line numberDiff line numberDiff line change
@@ -796,16 +796,38 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
796796
/// searches to be performed in parallel, caching of results across
797797
/// queries and incremental addition to Worklist. Stops early if N is
798798
/// found but will resume. Remember to clear Visited and Worklists
799-
/// if DAG changes.
799+
/// if DAG changes. MaxSteps gives a maximum number of nodes to visit before
800+
/// giving up. The TopologicalPrune flag signals that positive NodeIds are
801+
/// topologically ordered (Operands have strictly smaller node id) and search
802+
/// can be pruned leveraging this.
800803
static bool hasPredecessorHelper(const SDNode *N,
801804
SmallPtrSetImpl<const SDNode *> &Visited,
802805
SmallVectorImpl<const SDNode *> &Worklist,
803-
unsigned int MaxSteps = 0) {
806+
unsigned int MaxSteps = 0,
807+
bool TopologicalPrune = false) {
808+
SmallVector<const SDNode *, 8> DeferredNodes;
804809
if (Visited.count(N))
805810
return true;
811+
812+
// Node Id's are assigned in three places: As a topological
813+
// ordering (> 0), during legalization (results in values set to
814+
// 0), and new nodes (set to -1). If N has a topolgical id then we
815+
// know that all nodes with ids smaller than it cannot be
816+
// successors and we need not check them. Filter out all node
817+
// that can't be matches. We add them to the worklist before exit
818+
// in case of multiple calls.
819+
820+
int NId = N->getNodeId();
821+
822+
bool Found = false;
806823
while (!Worklist.empty()) {
807824
const SDNode *M = Worklist.pop_back_val();
808-
bool Found = false;
825+
int MId = M->getNodeId();
826+
if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) &&
827+
(MId > 0) && (MId < NId)) {
828+
DeferredNodes.push_back(M);
829+
continue;
830+
}
809831
for (const SDValue &OpV : M->op_values()) {
810832
SDNode *Op = OpV.getNode();
811833
if (Visited.insert(Op).second)
@@ -814,11 +836,13 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
814836
Found = true;
815837
}
816838
if (Found)
817-
return true;
839+
break;
818840
if (MaxSteps != 0 && Visited.size() >= MaxSteps)
819-
return false;
841+
break;
820842
}
821-
return false;
843+
// Push deferred nodes back on worklist.
844+
Worklist.append(DeferredNodes.begin(), DeferredNodes.end());
845+
return Found;
822846
}
823847

824848
/// Return true if all the users of N are contained in Nodes.

lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

+80-215
Original file line numberDiff line numberDiff line change
@@ -2137,54 +2137,44 @@ static SDNode *findGlueUse(SDNode *N) {
21372137
return nullptr;
21382138
}
21392139

2140-
/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
2141-
/// This function iteratively traverses up the operand chain, ignoring
2142-
/// certain nodes.
2143-
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
2144-
SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
2140+
/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path
2141+
/// beyond "ImmedUse". We may ignore chains as they are checked separately.
2142+
static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
21452143
bool IgnoreChains) {
2146-
// The NodeID's are given uniques ID's where a node ID is guaranteed to be
2147-
// greater than all of its (recursive) operands. If we scan to a point where
2148-
// 'use' is smaller than the node we're scanning for, then we know we will
2149-
// never find it.
2150-
//
2151-
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
2152-
// happen because we scan down to newly selected nodes in the case of glue
2153-
// uses.
2154-
std::vector<SDNode *> WorkList;
2155-
WorkList.push_back(Use);
2156-
2157-
while (!WorkList.empty()) {
2158-
Use = WorkList.back();
2159-
WorkList.pop_back();
2160-
// NodeId topological order of TokenFactors is not guaranteed. Do not skip.
2161-
if (Use->getOpcode() != ISD::TokenFactor &&
2162-
Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
2163-
continue;
2144+
SmallPtrSet<const SDNode *, 16> Visited;
2145+
SmallVector<const SDNode *, 16> WorkList;
2146+
// Only check if we have non-immediate uses of Def.
2147+
if (ImmedUse->isOnlyUserOf(Def))
2148+
return false;
21642149

2165-
// Don't revisit nodes if we already scanned it and didn't fail, we know we
2166-
// won't fail if we scan it again.
2167-
if (!Visited.insert(Use).second)
2150+
// We don't care about paths to Def that go through ImmedUse so mark it
2151+
// visited and mark non-def operands as used.
2152+
Visited.insert(ImmedUse);
2153+
for (const SDValue &Op : ImmedUse->op_values()) {
2154+
SDNode *N = Op.getNode();
2155+
// Ignore chain deps (they are validated by
2156+
// HandleMergeInputChains) and immediate uses
2157+
if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
21682158
continue;
2159+
if (!Visited.insert(N).second)
2160+
continue;
2161+
WorkList.push_back(N);
2162+
}
21692163

2170-
for (const SDValue &Op : Use->op_values()) {
2171-
// Ignore chain uses, they are validated by HandleMergeInputChains.
2172-
if (Op.getValueType() == MVT::Other && IgnoreChains)
2173-
continue;
2174-
2164+
// Initialize worklist to operands of Root.
2165+
if (Root != ImmedUse) {
2166+
for (const SDValue &Op : Root->op_values()) {
21752167
SDNode *N = Op.getNode();
2176-
if (N == Def) {
2177-
if (Use == ImmedUse || Use == Root)
2178-
continue; // We are not looking for immediate use.
2179-
assert(N != Root);
2180-
return true;
2181-
}
2182-
2183-
// Traverse up the operand chain.
2168+
// Ignore chains (they are validated by HandleMergeInputChains)
2169+
if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def)
2170+
continue;
2171+
if (!Visited.insert(N).second)
2172+
continue;
21842173
WorkList.push_back(N);
21852174
}
21862175
}
2187-
return false;
2176+
2177+
return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true);
21882178
}
21892179

21902180
/// IsProfitableToFold - Returns true if it's profitable to fold the specific
@@ -2256,13 +2246,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
22562246

22572247
// If our query node has a glue result with a use, we've walked up it. If
22582248
// the user (which has already been selected) has a chain or indirectly uses
2259-
// the chain, our WalkChainUsers predicate will not consider it. Because of
2249+
// the chain, HandleMergeInputChains will not consider it. Because of
22602250
// this, we cannot ignore chains in this predicate.
22612251
IgnoreChains = false;
22622252
}
22632253

2264-
SmallPtrSet<SDNode*, 16> Visited;
2265-
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
2254+
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
22662255
}
22672256

22682257
void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -2381,143 +2370,6 @@ void SelectionDAGISel::UpdateChains(
23812370
DEBUG(dbgs() << "ISEL: Match complete!\n");
23822371
}
23832372

2384-
enum ChainResult {
2385-
CR_Simple,
2386-
CR_InducesCycle,
2387-
CR_LeadsToInteriorNode
2388-
};
2389-
2390-
/// WalkChainUsers - Walk down the users of the specified chained node that is
2391-
/// part of the pattern we're matching, looking at all of the users we find.
2392-
/// This determines whether something is an interior node, whether we have a
2393-
/// non-pattern node in between two pattern nodes (which prevent folding because
2394-
/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
2395-
/// between pattern nodes (in which case the TF becomes part of the pattern).
2396-
///
2397-
/// The walk we do here is guaranteed to be small because we quickly get down to
2398-
/// already selected nodes "below" us.
2399-
static ChainResult
2400-
WalkChainUsers(const SDNode *ChainedNode,
2401-
SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
2402-
DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
2403-
SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
2404-
ChainResult Result = CR_Simple;
2405-
2406-
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
2407-
E = ChainedNode->use_end(); UI != E; ++UI) {
2408-
// Make sure the use is of the chain, not some other value we produce.
2409-
if (UI.getUse().getValueType() != MVT::Other) continue;
2410-
2411-
SDNode *User = *UI;
2412-
2413-
if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
2414-
continue;
2415-
2416-
// If we see an already-selected machine node, then we've gone beyond the
2417-
// pattern that we're selecting down into the already selected chunk of the
2418-
// DAG.
2419-
unsigned UserOpcode = User->getOpcode();
2420-
if (User->isMachineOpcode() ||
2421-
UserOpcode == ISD::CopyToReg ||
2422-
UserOpcode == ISD::CopyFromReg ||
2423-
UserOpcode == ISD::INLINEASM ||
2424-
UserOpcode == ISD::EH_LABEL ||
2425-
UserOpcode == ISD::LIFETIME_START ||
2426-
UserOpcode == ISD::LIFETIME_END) {
2427-
// If their node ID got reset to -1 then they've already been selected.
2428-
// Treat them like a MachineOpcode.
2429-
if (User->getNodeId() == -1)
2430-
continue;
2431-
}
2432-
2433-
// If we have a TokenFactor, we handle it specially.
2434-
if (User->getOpcode() != ISD::TokenFactor) {
2435-
// If the node isn't a token factor and isn't part of our pattern, then it
2436-
// must be a random chained node in between two nodes we're selecting.
2437-
// This happens when we have something like:
2438-
// x = load ptr
2439-
// call
2440-
// y = x+4
2441-
// store y -> ptr
2442-
// Because we structurally match the load/store as a read/modify/write,
2443-
// but the call is chained between them. We cannot fold in this case
2444-
// because it would induce a cycle in the graph.
2445-
if (!std::count(ChainedNodesInPattern.begin(),
2446-
ChainedNodesInPattern.end(), User))
2447-
return CR_InducesCycle;
2448-
2449-
// Otherwise we found a node that is part of our pattern. For example in:
2450-
// x = load ptr
2451-
// y = x+4
2452-
// store y -> ptr
2453-
// This would happen when we're scanning down from the load and see the
2454-
// store as a user. Record that there is a use of ChainedNode that is
2455-
// part of the pattern and keep scanning uses.
2456-
Result = CR_LeadsToInteriorNode;
2457-
InteriorChainedNodes.push_back(User);
2458-
continue;
2459-
}
2460-
2461-
// If we found a TokenFactor, there are two cases to consider: first if the
2462-
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
2463-
// uses of the TF are in our pattern) we just want to ignore it. Second,
2464-
// the TokenFactor can be sandwiched in between two chained nodes, like so:
2465-
// [Load chain]
2466-
// ^
2467-
// |
2468-
// [Load]
2469-
// ^ ^
2470-
// | \ DAG's like cheese
2471-
// / \ do you?
2472-
// / |
2473-
// [TokenFactor] [Op]
2474-
// ^ ^
2475-
// | |
2476-
// \ /
2477-
// \ /
2478-
// [Store]
2479-
//
2480-
// In this case, the TokenFactor becomes part of our match and we rewrite it
2481-
// as a new TokenFactor.
2482-
//
2483-
// To distinguish these two cases, do a recursive walk down the uses.
2484-
auto MemoizeResult = TokenFactorResult.find(User);
2485-
bool Visited = MemoizeResult != TokenFactorResult.end();
2486-
// Recursively walk chain users only if the result is not memoized.
2487-
if (!Visited) {
2488-
auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
2489-
InteriorChainedNodes);
2490-
MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
2491-
}
2492-
switch (MemoizeResult->second) {
2493-
case CR_Simple:
2494-
// If the uses of the TokenFactor are just already-selected nodes, ignore
2495-
// it, it is "below" our pattern.
2496-
continue;
2497-
case CR_InducesCycle:
2498-
// If the uses of the TokenFactor lead to nodes that are not part of our
2499-
// pattern that are not selected, folding would turn this into a cycle,
2500-
// bail out now.
2501-
return CR_InducesCycle;
2502-
case CR_LeadsToInteriorNode:
2503-
break; // Otherwise, keep processing.
2504-
}
2505-
2506-
// Okay, we know we're in the interesting interior case. The TokenFactor
2507-
// is now going to be considered part of the pattern so that we rewrite its
2508-
// uses (it may have uses that are not part of the pattern) with the
2509-
// ultimate chain result of the generated code. We will also add its chain
2510-
// inputs as inputs to the ultimate TokenFactor we create.
2511-
Result = CR_LeadsToInteriorNode;
2512-
if (!Visited) {
2513-
ChainedNodesInPattern.push_back(User);
2514-
InteriorChainedNodes.push_back(User);
2515-
}
2516-
}
2517-
2518-
return Result;
2519-
}
2520-
25212373
/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
25222374
/// operation for when the pattern matched at least one node with a chains. The
25232375
/// input vector contains a list of all of the chained nodes that we match. We
@@ -2527,47 +2379,60 @@ WalkChainUsers(const SDNode *ChainedNode,
25272379
static SDValue
25282380
HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
25292381
SelectionDAG *CurDAG) {
2530-
// Used for memoization. Without it WalkChainUsers could take exponential
2531-
// time to run.
2532-
DenseMap<const SDNode *, ChainResult> TokenFactorResult;
2533-
// Walk all of the chained nodes we've matched, recursively scanning down the
2534-
// users of the chain result. This adds any TokenFactor nodes that are caught
2535-
// in between chained nodes to the chained and interior nodes list.
2536-
SmallVector<SDNode*, 3> InteriorChainedNodes;
2537-
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
2538-
if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
2539-
TokenFactorResult,
2540-
InteriorChainedNodes) == CR_InducesCycle)
2541-
return SDValue(); // Would induce a cycle.
2542-
}
25432382

2544-
// Okay, we have walked all the matched nodes and collected TokenFactor nodes
2545-
// that we are interested in. Form our input TokenFactor node.
2383+
SmallPtrSet<const SDNode *, 16> Visited;
2384+
SmallVector<const SDNode *, 8> Worklist;
25462385
SmallVector<SDValue, 3> InputChains;
2547-
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
2548-
// Add the input chain of this node to the InputChains list (which will be
2549-
// the operands of the generated TokenFactor) if it's not an interior node.
2550-
SDNode *N = ChainNodesMatched[i];
2551-
if (N->getOpcode() != ISD::TokenFactor) {
2552-
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
2553-
continue;
2386+
unsigned int Max = 8192;
25542387

2555-
// Otherwise, add the input chain.
2556-
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
2557-
assert(InChain.getValueType() == MVT::Other && "Not a chain");
2558-
InputChains.push_back(InChain);
2559-
continue;
2560-
}
2388+
// Quick exit on trivial merge.
2389+
if (ChainNodesMatched.size() == 1)
2390+
return ChainNodesMatched[0]->getOperand(0);
25612391

2562-
// If we have a token factor, we want to add all inputs of the token factor
2563-
// that are not part of the pattern we're matching.
2564-
for (const SDValue &Op : N->op_values()) {
2565-
if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
2566-
Op.getNode()))
2567-
InputChains.push_back(Op);
2568-
}
2392+
// Add chains that aren't already added (internal). Peek through
2393+
// token factors.
2394+
std::function<void(const SDValue)> AddChains = [&](const SDValue V) {
2395+
if (V.getValueType() != MVT::Other)
2396+
return;
2397+
if (V->getOpcode() == ISD::EntryToken)
2398+
return;
2399+
// Newly selected nodes (-1) are always added directly.
2400+
if (V->getNodeId() == -1)
2401+
InputChains.push_back(V);
2402+
else if (V->getOpcode() == ISD::TokenFactor) {
2403+
for (int i = 0, e = V->getNumOperands(); i != e; ++i)
2404+
AddChains(V->getOperand(i));
2405+
} else if (!Visited.count(V.getNode()))
2406+
InputChains.push_back(V);
2407+
};
2408+
2409+
for (auto *N : ChainNodesMatched) {
2410+
Worklist.push_back(N);
2411+
Visited.insert(N);
25692412
}
25702413

2414+
while (!Worklist.empty())
2415+
AddChains(Worklist.pop_back_val()->getOperand(0));
2416+
2417+
// Skip the search if there are no chain dependencies.
2418+
if (InputChains.size() == 0)
2419+
return CurDAG->getEntryNode();
2420+
2421+
// If one of these chains is a successor of input, we must have a
2422+
// node that is both the predecessor and successor of the
2423+
// to-be-merged nodes. Fail.
2424+
Visited.clear();
2425+
for (SDValue V : InputChains)
2426+
Worklist.push_back(V.getNode());
2427+
2428+
for (auto *N : ChainNodesMatched)
2429+
if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true))
2430+
return SDValue();
2431+
// Fail conservatively if we stopped searching early.
2432+
if (Visited.size() >= Max)
2433+
return SDValue();
2434+
2435+
// Return merged chain.
25712436
if (InputChains.size() == 1)
25722437
return InputChains[0];
25732438
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),

0 commit comments

Comments
 (0)