@@ -511,7 +511,53 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511 }
512512}
513513
514+ // / Computes topological order of functions in the call graph.
515+ // / Returns functions in reverse topological order.
516+ // / This allows single-pass bottom-up propagation.
517+ std::vector<Function *> getTopologicalOrder (const CallGraphTy &CG,
518+ const std::vector<Function *> &EntryPoints) {
519+ std::vector<Function *> Result;
520+ DenseMap<const Function *, unsigned > InDegree;
521+
522+ // Build reverse call graph and compute in-degrees.
523+ DenseMap<Function *, SmallVector<Function *, 4 >> ReverseCG;
524+ SmallPtrSet<Function *, 32 > AllFunctions;
525+ for (const auto &[Caller, Callees] : CG) {
526+ AllFunctions.insert (Caller);
527+ for (Function *Callee : Callees) {
528+ AllFunctions.insert (Callee);
529+ ReverseCG[Callee].push_back (Caller);
530+ InDegree[Caller]++;
531+ }
532+ }
533+
534+ // Start with leaf functions.
535+ std::queue<Function *> Worklist;
536+ for (Function *F : AllFunctions) {
537+ if (InDegree[F] == 0 )
538+ Worklist.push (F);
539+ }
540+
541+ // Kahn's algorithm for topological sort.
542+ while (!Worklist.empty ()) {
543+ Function *F = Worklist.front ();
544+ Worklist.pop ();
545+ Result.push_back (F);
546+
547+ auto It = ReverseCG.find (F);
548+ if (It != ReverseCG.end ()) {
549+ for (Function *Caller : It->second ) {
550+ if (--InDegree[Caller] == 0 )
551+ Worklist.push (Caller);
552+ }
553+ }
554+ }
555+
556+ return Result;
557+ }
558+
514559// / Propagates aspects from leaves up to the top of call graph.
560+ // / Uses topological sort for efficient single-pass propagation.
515561// / NB! Call graph corresponds to call graph of SYCL code which
516562// / can't contain recursive calls. So there can't be loops in
517563// / a call graph. But there can be path's intersections.
@@ -534,6 +580,91 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534580 AspectsMap[F].insert (LocalAspects.begin (), LocalAspects.end ());
535581}
536582
583+ // / Processes each function exactly once in bottom-up order.
584+ void propagateAspectsThroughCGOptimized (const std::vector<Function *> &TopoOrder,
585+ const CallGraphTy &CG,
586+ FunctionToAspectsMapTy &AspectsMap) {
587+ // Process in topological order.
588+ for (Function *F : TopoOrder) {
589+ auto It = CG.find (F);
590+ if (It == CG.end ())
591+ continue ;
592+
593+ // Merge aspects from all callees.
594+ for (Function *Callee : It->second ) {
595+ const auto &CalleeAspects = AspectsMap[Callee];
596+ AspectsMap[F].insert (CalleeAspects.begin (), CalleeAspects.end ());
597+ }
598+ }
599+ }
600+
601+ // / This reduces redundant type lookups when multiple instructions use the same type.
602+ AspectsSetTy getAspectsFromInstructions (Function &F,
603+ TypeToAspectsMapTy &TypesWithAspects,
604+ int FP64Aspect, bool FP64ConvEmu) {
605+ AspectsSetTy Result;
606+
607+ // Collect unique types used across all instructions.
608+ SmallDenseMap<const Type *, bool , 32 > SeenTypes;
609+
610+ for (Instruction &I : instructions (F)) {
611+ // Check instruction return type.
612+ const Type *ReturnType = I.getType ();
613+ if (auto *AI = dyn_cast<AllocaInst>(&I))
614+ ReturnType = AI->getAllocatedType ();
615+
616+ bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction (I);
617+
618+ // Only analyze this type once.
619+ if (SeenTypes.try_emplace (ReturnType, IsFP64Conversion && hasDoubleType (ReturnType)).second ) {
620+ const AspectsSetTy &Aspects = getAspectsFromType (ReturnType, TypesWithAspects);
621+ for (int Aspect : Aspects) {
622+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
623+ Result.insert (Aspect);
624+ }
625+ }
626+
627+ // Check operand types.
628+ for (const auto &OperandIt : I.operands ()) {
629+ const Type *OpType = nullptr ;
630+ if (const auto *GV = dyn_cast<GlobalValue>(OperandIt->stripPointerCasts ()))
631+ OpType = GV->getValueType ();
632+ else
633+ OpType = OperandIt->getType ();
634+
635+ if (OpType && SeenTypes.try_emplace (OpType, IsFP64Conversion && hasDoubleType (OpType)).second ) {
636+ const AspectsSetTy &Aspects = getAspectsFromType (OpType, TypesWithAspects);
637+ for (int Aspect : Aspects) {
638+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
639+ Result.insert (Aspect);
640+ }
641+ }
642+ }
643+
644+ // Check GEP source type.
645+ if (auto *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
646+ const Type *SourceType = GEPI->getSourceElementType ();
647+ if (SeenTypes.try_emplace (SourceType, IsFP64Conversion && hasDoubleType (SourceType)).second ) {
648+ const AspectsSetTy &Aspects = getAspectsFromType (SourceType, TypesWithAspects);
649+ for (int Aspect : Aspects) {
650+ if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
651+ Result.insert (Aspect);
652+ }
653+ }
654+ }
655+
656+ // Check instruction-level metadata.
657+ if (const MDNode *InstAspects = I.getMetadata (" sycl_used_aspects" )) {
658+ for (const MDOperand &MDOp : InstAspects->operands ()) {
659+ const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue ();
660+ Result.insert (cast<ConstantInt>(C)->getSExtValue ());
661+ }
662+ }
663+ }
664+
665+ return Result;
666+ }
667+
537668// / Processes a function:
538669// / - checks if return and argument types are using any aspects
539670// / - checks if instructions are using any aspects
@@ -564,19 +695,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564695 FunctionToUsedAspects[&F].insert (Aspect);
565696 }
566697
698+ // Optimized instruction analysis with type deduplication.
699+ const AspectsSetTy InstrAspects =
700+ getAspectsFromInstructions (F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
701+ FunctionToUsedAspects[&F].insert (InstrAspects.begin (), InstrAspects.end ());
702+
703+ // Build call graph.
567704 for (Instruction &I : instructions (F)) {
568- const AspectsSetTy Aspects =
569- getAspectsUsedByInstruction (I, TypesWithAspects);
570- for (const auto &Aspect : Aspects)
571- if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType (I) ||
572- !isFP64ConversionInstruction (I))
573- FunctionToUsedAspects[&F].insert (Aspect);
574705 if (const auto *CI = dyn_cast<CallInst>(&I)) {
575706 if (!CI->isIndirectCall () && CI->getCalledFunction ())
576707 CG[&F].insert (CI->getCalledFunction ());
577708 }
578709 }
579710
711+ // Collect aspects from metadata (combined to reduce lookups).
580712 auto CollectAspectsFromMD = [&F](const char * MDName, FunctionToAspectsMapTy &Map) {
581713 if (const MDNode *MD = F.getMetadata (MDName)) {
582714 AspectsSetTy Aspects;
@@ -696,23 +828,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696828 collectVirtualFunctionSetInfo (F, VirtualFunctionSets);
697829 }
698830
831+ // Compute topological order once for both propagation passes.
832+ std::vector<Function *> TopoOrder = getTopologicalOrder (CG, EntryPoints);
833+
834+ // Handle virtual function sets (still needs old recursive propagation)
699835 SmallPtrSet<const Function *, 16 > Visited;
700836 for (Function *F : EntryPoints) {
701- propagateAspectsThroughCG (F, CG, FunctionToUsedAspects, Visited);
702837 processDeclaredVirtualFunctionSets (F, CG, FunctionToUsedAspects, Visited,
703838 VirtualFunctionSets);
704839 }
705840
841+ // Optimized single-pass propagation for used aspects.
842+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToUsedAspects);
843+
706844 if (ValidateAspects)
707845 validateUsedAspectsForFunctions (FunctionToUsedAspects, AspectValues,
708846 EntryPoints, CG);
709847
710- // The set of aspects from FunctionToDeclaredAspects should be merged to the
711- // set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712- // avoid errors during validation.
713- Visited.clear ();
714- for (Function *F : EntryPoints)
715- propagateAspectsThroughCG (F, CG, FunctionToDeclaredAspects, Visited);
848+ // Optimized single-pass propagation for declared aspects.
849+ propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToDeclaredAspects);
716850
717851 return {std::move (FunctionToUsedAspects),
718852 std::move (FunctionToDeclaredAspects)};
0 commit comments