@@ -511,7 +511,50 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511 }
512512}
513513
514+ // / Computes topological order of functions in the call graph.
515+ // / Returns functions in reverse topological order.
516+ // / This allows single-pass bottom-up propagation.
517+ std::vector<Function *> getTopologicalOrder (const CallGraphTy &CG,
518+ const std::vector<Function *> &EntryPoints) {
519+ std::vector<Function *> Result;
520+ DenseMap<const Function *, unsigned > InDegree;
521+
522+ // Build reverse call graph and compute in-degrees.
523+ DenseMap<Function *, SmallVector<Function *, 4 >> ReverseCG;
524+ for (const auto &[Caller, Callees] : CG) {
525+ for (Function *Callee : Callees) {
526+ ReverseCG[Callee].push_back (Caller);
527+ InDegree[Caller]++;
528+ }
529+ }
530+
531+ // Start with functions that have no callees.
532+ std::queue<Function *> Worklist;
533+ for (const auto &[F, Callees] : CG) {
534+ if (InDegree[F] == 0 )
535+ Worklist.push (F);
536+ }
537+
538+ // Kahn's algorithm for topological sort.
539+ while (!Worklist.empty ()) {
540+ Function *F = Worklist.front ();
541+ Worklist.pop ();
542+ Result.push_back (F);
543+
544+ auto It = ReverseCG.find (F);
545+ if (It != ReverseCG.end ()) {
546+ for (Function *Caller : It->second ) {
547+ if (--InDegree[Caller] == 0 )
548+ Worklist.push (Caller);
549+ }
550+ }
551+ }
552+
553+ return Result;
554+ }
555+
514556// / Propagates aspects from leaves up to the top of call graph.
557+ // / Uses topological sort for efficient single-pass propagation.
515558// / NB! Call graph corresponds to call graph of SYCL code which
516559// / can't contain recursive calls. So there can't be loops in
517560// / a call graph. But there can be path's intersections.
@@ -534,6 +577,91 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534577 AspectsMap[F].insert (LocalAspects.begin (), LocalAspects.end ());
535578}
536579
580+ // / Processes each function exactly once in bottom-up order.
581+ void propagateAspectsThroughCGOptimized (const std::vector<Function *> &TopoOrder,
582+ const CallGraphTy &CG,
583+ FunctionToAspectsMapTy &AspectsMap) {
584+ // Process in topological order.
585+ for (Function *F : TopoOrder) {
586+ auto It = CG.find (F);
587+ if (It == CG.end ())
588+ continue ;
589+
590+ // Merge aspects from all callees.
591+ for (Function *Callee : It->second ) {
592+ const auto &CalleeAspects = AspectsMap[Callee];
593+ AspectsMap[F].insert (CalleeAspects.begin (), CalleeAspects.end ());
594+ }
595+ }
596+ }
597+
598+ // / This reduces redundant type lookups when multiple instructions use the same type.
599+ AspectsSetTy getAspectsFromInstructions (Function &F,
600+ TypeToAspectsMapTy &TypesWithAspects,
601+ int FP64Aspect, bool FP64ConvEmu) {
602+ AspectsSetTy Result;
603+
604+ // Collect unique types used across all instructions.
605+ SmallDenseMap<const Type *, bool , 32 > SeenTypes;
606+
607+ for (Instruction &I : instructions (F)) {
608+ // Check instruction return type.
609+ const Type *ReturnType = I.getType ();
610+ if (auto *AI = dyn_cast<AllocaInst>(&I))
611+ ReturnType = AI->getAllocatedType ();
612+
613+ bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction (I);
614+
615+ // Only analyze this type once.
616+ if (SeenTypes.try_emplace (ReturnType, IsFP64Conversion && hasDoubleType (ReturnType)).second ) {
617+ const AspectsSetTy &Aspects = getAspectsFromType (ReturnType, TypesWithAspects);
618+ for (int Aspect : Aspects) {
619+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
620+ Result.insert (Aspect);
621+ }
622+ }
623+
624+ // Check operand types.
625+ for (const auto &OperandIt : I.operands ()) {
626+ const Type *OpType = nullptr ;
627+ if (const auto *GV = dyn_cast<GlobalValue>(OperandIt->stripPointerCasts ()))
628+ OpType = GV->getValueType ();
629+ else
630+ OpType = OperandIt->getType ();
631+
632+ if (OpType && SeenTypes.try_emplace (OpType, IsFP64Conversion && hasDoubleType (OpType)).second ) {
633+ const AspectsSetTy &Aspects = getAspectsFromType (OpType, TypesWithAspects);
634+ for (int Aspect : Aspects) {
635+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
636+ Result.insert (Aspect);
637+ }
638+ }
639+ }
640+
641+ // Check GEP source type.
642+ if (auto *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
643+ const Type *SourceType = GEPI->getSourceElementType ();
644+ if (SeenTypes.try_emplace (SourceType, IsFP64Conversion && hasDoubleType (SourceType)).second ) {
645+ const AspectsSetTy &Aspects = getAspectsFromType (SourceType, TypesWithAspects);
646+ for (int Aspect : Aspects) {
647+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
648+ Result.insert (Aspect);
649+ }
650+ }
651+ }
652+
653+ // Check instruction-level metadata.
654+ if (const MDNode *InstAspects = I.getMetadata (" sycl_used_aspects" )) {
655+ for (const MDOperand &MDOp : InstAspects->operands ()) {
656+ const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue ();
657+ Result.insert (cast<ConstantInt>(C)->getSExtValue ());
658+ }
659+ }
660+ }
661+
662+ return Result;
663+ }
664+
537665// / Processes a function:
538666// / - checks if return and argument types are using any aspects
539667// / - checks if instructions are using any aspects
@@ -549,12 +677,14 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
549677 assert (FP64AspectIt != AspectValues.end () &&
550678 " fp64 aspect was not found in the aspect values." );
551679 auto FP64Aspect = FP64AspectIt->second ;
680+
552681 const AspectsSetTy RetTyAspects =
553682 getAspectsFromType (F.getReturnType (), TypesWithAspects);
554683 for (const auto &Aspect : RetTyAspects)
555684 if (!FP64ConvEmu || (Aspect != FP64Aspect) ||
556685 !hasDoubleType (F.getReturnType ()))
557686 FunctionToUsedAspects[&F].insert (Aspect);
687+
558688 for (Argument &Arg : F.args ()) {
559689 const AspectsSetTy ArgAspects =
560690 getAspectsFromType (Arg.getType (), TypesWithAspects);
@@ -564,19 +694,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564694 FunctionToUsedAspects[&F].insert (Aspect);
565695 }
566696
697+ // Optimized instruction analysis with type deduplication.
698+ const AspectsSetTy InstrAspects =
699+ getAspectsFromInstructions (F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
700+ FunctionToUsedAspects[&F].insert (InstrAspects.begin (), InstrAspects.end ());
701+
702+ // Build call graph.
567703 for (Instruction &I : instructions (F)) {
568- const AspectsSetTy Aspects =
569- getAspectsUsedByInstruction (I, TypesWithAspects);
570- for (const auto &Aspect : Aspects)
571- if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType (I) ||
572- !isFP64ConversionInstruction (I))
573- FunctionToUsedAspects[&F].insert (Aspect);
574704 if (const auto *CI = dyn_cast<CallInst>(&I)) {
575705 if (!CI->isIndirectCall () && CI->getCalledFunction ())
576706 CG[&F].insert (CI->getCalledFunction ());
577707 }
578708 }
579709
710+ // Collect aspects from metadata (combined to reduce lookups).
580711 auto CollectAspectsFromMD = [&F](const char * MDName, FunctionToAspectsMapTy &Map) {
581712 if (const MDNode *MD = F.getMetadata (MDName)) {
582713 AspectsSetTy Aspects;
@@ -696,23 +827,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696827 collectVirtualFunctionSetInfo (F, VirtualFunctionSets);
697828 }
698829
830+ // Compute topological order once for both propagation passes.
831+ std::vector<Function *> TopoOrder = getTopologicalOrder (CG, EntryPoints);
832+
833+ // Handle virtual function sets (still needs old recursive propagation)
699834 SmallPtrSet<const Function *, 16 > Visited;
700835 for (Function *F : EntryPoints) {
701- propagateAspectsThroughCG (F, CG, FunctionToUsedAspects, Visited);
702836 processDeclaredVirtualFunctionSets (F, CG, FunctionToUsedAspects, Visited,
703837 VirtualFunctionSets);
704838 }
705839
840+ // Optimized single-pass propagation for used aspects.
841+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToUsedAspects);
842+
706843 if (ValidateAspects)
707844 validateUsedAspectsForFunctions (FunctionToUsedAspects, AspectValues,
708845 EntryPoints, CG);
709846
710- // The set of aspects from FunctionToDeclaredAspects should be merged to the
711- // set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712- // avoid errors during validation.
713- Visited.clear ();
714- for (Function *F : EntryPoints)
715- propagateAspectsThroughCG (F, CG, FunctionToDeclaredAspects, Visited);
847+ // Optimized single-pass propagation for declared aspects.
848+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToDeclaredAspects);
716849
717850 return {std::move (FunctionToUsedAspects),
718851 std::move (FunctionToDeclaredAspects)};
0 commit comments