@@ -39,10 +39,15 @@ static cl::opt<unsigned> MaxClones(
3939 " The maximum number of clones allowed for a single function "
4040 " specialization" ));
4141
42+ static cl::opt<unsigned > MaxDiscoveryDepth (
43+ " funcspec-max-discovery-depth" , cl::init(10 ), cl::Hidden,
44+ cl::desc(" The maximum recursion depth allowed when searching for strongly "
45+ " connected phis" ));
46+
4247static cl::opt<unsigned > MaxIncomingPhiValues (
43- " funcspec-max-incoming-phi-values" , cl::init(4 ), cl::Hidden, cl::desc(
44- " The maximum number of incoming values a PHI node can have to be "
45- " considered during the specialization bonus estimation" ));
48+ " funcspec-max-incoming-phi-values" , cl::init(8 ), cl::Hidden,
49+ cl::desc( " The maximum number of incoming values a PHI node can have to be "
50+ " considered during the specialization bonus estimation" ));
4651
4752static cl::opt<unsigned > MaxBlockPredecessors (
4853 " funcspec-max-block-predecessors" , cl::init(2 ), cl::Hidden, cl::desc(
@@ -64,9 +69,9 @@ static cl::opt<unsigned> MinCodeSizeSavings(
6469 " much percent of the original function size" ));
6570
6671static cl::opt<unsigned > MinLatencySavings (
67- " funcspec-min-latency-savings" , cl::init(70 ), cl::Hidden, cl::desc(
68- " Reject specializations whose latency savings are less than this"
69- " much percent of the original function size" ));
72+ " funcspec-min-latency-savings" , cl::init(45 ), cl::Hidden,
73+ cl::desc( " Reject specializations whose latency savings are less than this"
74+ " much percent of the original function size" ));
7075
7176static cl::opt<unsigned > MinInliningBonus (
7277 " funcspec-min-inlining-bonus" , cl::init(300 ), cl::Hidden, cl::desc(
@@ -262,30 +267,86 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
262267 return estimateBasicBlocks (WorkList);
263268}
264269
270+ void InstCostVisitor::discoverStronglyConnectedComponent (PHINode *PN,
271+ unsigned Depth) {
272+ if (Depth > MaxDiscoveryDepth)
273+ return ;
274+
275+ if (PN->getNumIncomingValues () > MaxIncomingPhiValues)
276+ return ;
277+
278+ if (!StronglyConnectedPHIs.insert (PN).second )
279+ return ;
280+
281+ for (unsigned I = 0 , E = PN->getNumIncomingValues (); I != E; ++I) {
282+ Value *V = PN->getIncomingValue (I);
283+ if (auto *Phi = dyn_cast<PHINode>(V)) {
284+ if (Phi == PN || DeadBlocks.contains (PN->getIncomingBlock (I)))
285+ continue ;
286+ discoverStronglyConnectedComponent (Phi, Depth + 1 );
287+ }
288+ }
289+ }
290+
265291Constant *InstCostVisitor::visitPHINode (PHINode &I) {
266292 if (I.getNumIncomingValues () > MaxIncomingPhiValues)
267293 return nullptr ;
268294
269295 bool Inserted = VisitedPHIs.insert (&I).second ;
270296 Constant *Const = nullptr ;
297+ SmallVector<PHINode *, 8 > UnknownIncomingValues;
271298
272- for (unsigned Idx = 0 , E = I.getNumIncomingValues (); Idx != E; ++Idx) {
273- Value *V = I.getIncomingValue (Idx);
274- if (auto *Inst = dyn_cast<Instruction>(V))
275- if (Inst == &I || DeadBlocks.contains (I.getIncomingBlock (Idx)))
276- continue ;
277- Constant *C = findConstantFor (V, KnownConstants);
278- if (!C) {
279- if (Inserted)
280- PendingPHIs.push_back (&I);
281- return nullptr ;
299+ auto CanConstantFoldPhi = [&](PHINode *PN) -> bool {
300+ UnknownIncomingValues.clear ();
301+
302+ for (unsigned I = 0 , E = PN->getNumIncomingValues (); I != E; ++I) {
303+ Value *V = PN->getIncomingValue (I);
304+
305+ // Disregard self-references and dead incoming values.
306+ if (auto *Inst = dyn_cast<Instruction>(V))
307+ if (Inst == PN || DeadBlocks.contains (PN->getIncomingBlock (I)))
308+ continue ;
309+
310+ if (Constant *C = findConstantFor (V, KnownConstants)) {
311+ if (!Const)
312+ Const = C;
313+ // Not all incoming values are the same constant. Bail immediately.
314+ else if (C != Const)
315+ return false ;
316+ } else if (auto *Phi = dyn_cast<PHINode>(V)) {
317+ // It's not a strongly connected phi. Collect it and bail at the end.
318+ if (!StronglyConnectedPHIs.contains (Phi))
319+ UnknownIncomingValues.push_back (Phi);
320+ } else {
321+ // We can't reason about anything else.
322+ return false ;
323+ }
324+ }
325+ return UnknownIncomingValues.empty ();
326+ };
327+
328+ if (CanConstantFoldPhi (&I))
329+ return Const;
330+
331+ if (Inserted) {
332+ // First time we are seeing this phi. We'll retry later, after all
333+ // the constant arguments have been propagated. Bail for now.
334+ PendingPHIs.push_back (&I);
335+ return nullptr ;
336+ }
337+
338+ for (PHINode *Phi : UnknownIncomingValues)
339+ discoverStronglyConnectedComponent (Phi, 1 );
340+
341+ bool CannotConstantFoldPhi = false ;
342+ for (PHINode *Phi : StronglyConnectedPHIs) {
343+ if (!CanConstantFoldPhi (Phi)) {
344+ CannotConstantFoldPhi = true ;
345+ break ;
282346 }
283- if (!Const)
284- Const = C;
285- else if (C != Const)
286- return nullptr ;
287347 }
288- return Const;
348+ StronglyConnectedPHIs.clear ();
349+ return CannotConstantFoldPhi ? nullptr : Const;
289350}
290351
291352Constant *InstCostVisitor::visitFreezeInst (FreezeInst &I) {
@@ -809,20 +870,40 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
809870 auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
810871 unsigned FuncGrowth) -> bool {
811872 // No check required.
812- if (ForceSpecialization)
873+ if (ForceSpecialization) {
874+ LLVM_DEBUG (dbgs () << " Force is on\n " );
813875 return true ;
876+ }
814877 // Minimum inlining bonus.
815- if (Score > MinInliningBonus * FuncSize / 100 )
878+ if (Score > MinInliningBonus * FuncSize / 100 ) {
879+ LLVM_DEBUG (dbgs ()
880+ << " FnSpecialization: Min inliningbous: Score = " << Score
881+ << " > " << MinInliningBonus * FuncSize / 100 << " \n " );
816882 return true ;
883+ }
817884 // Minimum codesize savings.
818- if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100 )
885+ if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100 ) {
886+ LLVM_DEBUG (dbgs ()
887+ << " FnSpecialization: Min CodeSize Saving: CodeSize = "
888+ << B.CodeSize << " > "
889+ << MinCodeSizeSavings * FuncSize / 100 << " \n " );
819890 return false ;
891+ }
820892 // Minimum latency savings.
821- if (B.Latency < MinLatencySavings * FuncSize / 100 )
893+ if (B.Latency < MinLatencySavings * FuncSize / 100 ) {
894+ LLVM_DEBUG (dbgs ()
895+ << " FnSpecialization: Min Latency Saving: Latency = "
896+ << B.Latency << " > " << MinLatencySavings * FuncSize / 100
897+ << " \n " );
822898 return false ;
899+ }
823900 // Maximum codesize growth.
824- if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
901+ if (FuncGrowth / FuncSize > MaxCodeSizeGrowth) {
902+ LLVM_DEBUG (dbgs () << " FnSpecialization: Max Func Growth: CodeSize = "
903+ << FuncGrowth / FuncSize << " > "
904+ << MaxCodeSizeGrowth << " \n " );
825905 return false ;
906+ }
826907 return true ;
827908 };
828909
0 commit comments