@@ -511,7 +511,54 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511 }
512512}
513513
514+ // / Computes topological order of functions in the call graph.
515+ // / Returns functions in reverse topological order.
516+ // / This allows single-pass bottom-up propagation.
517+ std::vector<Function *>
518+ getTopologicalOrder (const CallGraphTy &CG,
519+ const std::vector<Function *> &EntryPoints) {
520+ std::vector<Function *> Result;
521+ DenseMap<const Function *, unsigned > InDegree;
522+
523+ // Build reverse call graph and compute in-degrees.
524+ DenseMap<Function *, SmallVector<Function *, 4 >> ReverseCG;
525+ SmallPtrSet<Function *, 32 > AllFunctions;
526+ for (const auto &[Caller, Callees] : CG) {
527+ AllFunctions.insert (Caller);
528+ for (Function *Callee : Callees) {
529+ AllFunctions.insert (Callee);
530+ ReverseCG[Callee].push_back (Caller);
531+ InDegree[Caller]++;
532+ }
533+ }
534+
535+ // Start with leaf functions.
536+ std::queue<Function *> Worklist;
537+ for (Function *F : AllFunctions) {
538+ if (InDegree[F] == 0 )
539+ Worklist.push (F);
540+ }
541+
542+ // Kahn's algorithm for topological sort.
543+ while (!Worklist.empty ()) {
544+ Function *F = Worklist.front ();
545+ Worklist.pop ();
546+ Result.push_back (F);
547+
548+ auto It = ReverseCG.find (F);
549+ if (It != ReverseCG.end ()) {
550+ for (Function *Caller : It->second ) {
551+ if (--InDegree[Caller] == 0 )
552+ Worklist.push (Caller);
553+ }
554+ }
555+ }
556+
557+ return Result;
558+ }
559+
514560// / Propagates aspects from leaves up to the top of call graph.
561+ // / Uses topological sort for efficient single-pass propagation.
515562// / NB! Call graph corresponds to call graph of SYCL code which
516563// / can't contain recursive calls. So there can't be loops in
517564// / a call graph. But there can be path's intersections.
@@ -534,6 +581,105 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534581 AspectsMap[F].insert (LocalAspects.begin (), LocalAspects.end ());
535582}
536583
584+ // / Processes each function exactly once in bottom-up order.
585+ void propagateAspectsThroughCGOptimized (
586+ const std::vector<Function *> &TopoOrder, const CallGraphTy &CG,
587+ FunctionToAspectsMapTy &AspectsMap) {
588+ // Process in topological order.
589+ for (Function *F : TopoOrder) {
590+ auto It = CG.find (F);
591+ if (It == CG.end ())
592+ continue ;
593+
594+ // Merge aspects from all callees.
595+ for (Function *Callee : It->second ) {
596+ const auto &CalleeAspects = AspectsMap[Callee];
597+ AspectsMap[F].insert (CalleeAspects.begin (), CalleeAspects.end ());
598+ }
599+ }
600+ }
601+
602+ // / This reduces redundant type lookups when multiple instructions use the same
603+ // / type.
604+ AspectsSetTy getAspectsFromInstructions (Function &F,
605+ TypeToAspectsMapTy &TypesWithAspects,
606+ int FP64Aspect, bool FP64ConvEmu) {
607+ AspectsSetTy Result;
608+
609+ // Collect unique types used across all instructions.
610+ SmallDenseMap<const Type *, bool , 32 > SeenTypes;
611+
612+ for (Instruction &I : instructions (F)) {
613+ // Check instruction return type.
614+ const Type *ReturnType = I.getType ();
615+ if (auto *AI = dyn_cast<AllocaInst>(&I))
616+ ReturnType = AI->getAllocatedType ();
617+
618+ bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction (I);
619+
620+ // Only analyze this type once.
621+ if (SeenTypes
622+ .try_emplace (ReturnType,
623+ IsFP64Conversion && hasDoubleType (ReturnType))
624+ .second ) {
625+ const AspectsSetTy &Aspects =
626+ getAspectsFromType (ReturnType, TypesWithAspects);
627+ for (int Aspect : Aspects) {
628+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
629+ Result.insert (Aspect);
630+ }
631+ }
632+
633+ // Check operand types.
634+ for (const auto &OperandIt : I.operands ()) {
635+ const Type *OpType = nullptr ;
636+ if (const auto *GV =
637+ dyn_cast<GlobalValue>(OperandIt->stripPointerCasts ()))
638+ OpType = GV->getValueType ();
639+ else
640+ OpType = OperandIt->getType ();
641+
642+ if (OpType &&
643+ SeenTypes
644+ .try_emplace (OpType, IsFP64Conversion && hasDoubleType (OpType))
645+ .second ) {
646+ const AspectsSetTy &Aspects =
647+ getAspectsFromType (OpType, TypesWithAspects);
648+ for (int Aspect : Aspects) {
649+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
650+ Result.insert (Aspect);
651+ }
652+ }
653+ }
654+
655+ // Check GEP source type.
656+ if (auto *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
657+ const Type *SourceType = GEPI->getSourceElementType ();
658+ if (SeenTypes
659+ .try_emplace (SourceType,
660+ IsFP64Conversion && hasDoubleType (SourceType))
661+ .second ) {
662+ const AspectsSetTy &Aspects =
663+ getAspectsFromType (SourceType, TypesWithAspects);
664+ for (int Aspect : Aspects) {
665+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
666+ Result.insert (Aspect);
667+ }
668+ }
669+ }
670+
671+ // Check instruction-level metadata.
672+ if (const MDNode *InstAspects = I.getMetadata (" sycl_used_aspects" )) {
673+ for (const MDOperand &MDOp : InstAspects->operands ()) {
674+ const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue ();
675+ Result.insert (cast<ConstantInt>(C)->getSExtValue ());
676+ }
677+ }
678+ }
679+
680+ return Result;
681+ }
682+
537683// / Processes a function:
538684// / - checks if return and argument types are using any aspects
539685// / - checks if instructions are using any aspects
@@ -564,19 +710,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564710 FunctionToUsedAspects[&F].insert (Aspect);
565711 }
566712
713+ // Optimized instruction analysis with type deduplication.
714+ const AspectsSetTy InstrAspects =
715+ getAspectsFromInstructions (F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
716+ FunctionToUsedAspects[&F].insert (InstrAspects.begin (), InstrAspects.end ());
717+
718+ // Build call graph.
567719 for (Instruction &I : instructions (F)) {
568- const AspectsSetTy Aspects =
569- getAspectsUsedByInstruction (I, TypesWithAspects);
570- for (const auto &Aspect : Aspects)
571- if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType (I) ||
572- !isFP64ConversionInstruction (I))
573- FunctionToUsedAspects[&F].insert (Aspect);
574720 if (const auto *CI = dyn_cast<CallInst>(&I)) {
575721 if (!CI->isIndirectCall () && CI->getCalledFunction ())
576722 CG[&F].insert (CI->getCalledFunction ());
577723 }
578724 }
579725
726+ // Collect aspects from metadata (combined to reduce lookups).
580727 auto CollectAspectsFromMD = [&F](const char * MDName, FunctionToAspectsMapTy &Map) {
581728 if (const MDNode *MD = F.getMetadata (MDName)) {
582729 AspectsSetTy Aspects;
@@ -696,23 +843,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696843 collectVirtualFunctionSetInfo (F, VirtualFunctionSets);
697844 }
698845
846+ // Compute topological order once for both propagation passes.
847+ std::vector<Function *> TopoOrder = getTopologicalOrder (CG, EntryPoints);
848+
849+ // Handle virtual function sets (still needs old recursive propagation)
699850 SmallPtrSet<const Function *, 16 > Visited;
700851 for (Function *F : EntryPoints) {
701- propagateAspectsThroughCG (F, CG, FunctionToUsedAspects, Visited);
702852 processDeclaredVirtualFunctionSets (F, CG, FunctionToUsedAspects, Visited,
703853 VirtualFunctionSets);
704854 }
705855
856+ // Optimized single-pass propagation for used aspects.
857+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToUsedAspects);
858+
706859 if (ValidateAspects)
707860 validateUsedAspectsForFunctions (FunctionToUsedAspects, AspectValues,
708861 EntryPoints, CG);
709862
710- // The set of aspects from FunctionToDeclaredAspects should be merged to the
711- // set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712- // avoid errors during validation.
713- Visited.clear ();
714- for (Function *F : EntryPoints)
715- propagateAspectsThroughCG (F, CG, FunctionToDeclaredAspects, Visited);
863+ // Optimized single-pass propagation for declared aspects.
864+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToDeclaredAspects);
716865
717866 return {std::move (FunctionToUsedAspects),
718867 std::move (FunctionToDeclaredAspects)};
0 commit comments