Skip to content

Commit b9ccd09

Browse files
committed
[SYCL] Rewrite aspect propagation function lookup using toposort
Signed-off-by: Dmitry Sidorov <dmitrii.s.sidorov@gmail.com>
1 parent 3ccc8ec commit b9ccd09

File tree

1 file changed

+147
-13
lines changed

1 file changed

+147
-13
lines changed

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 147 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,53 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511
}
512512
}
513513

514+
/// Computes topological order of functions in the call graph.
515+
/// Returns functions in reverse topological order.
516+
/// This allows single-pass bottom-up propagation.
517+
std::vector<Function *> getTopologicalOrder(const CallGraphTy &CG,
518+
const std::vector<Function *> &EntryPoints) {
519+
std::vector<Function *> Result;
520+
DenseMap<const Function *, unsigned> InDegree;
521+
522+
// Build reverse call graph and compute in-degrees.
523+
DenseMap<Function *, SmallVector<Function *, 4>> ReverseCG;
524+
SmallPtrSet<Function *, 32> AllFunctions;
525+
for (const auto &[Caller, Callees] : CG) {
526+
AllFunctions.insert(Caller);
527+
for (Function *Callee : Callees) {
528+
AllFunctions.insert(Callee);
529+
ReverseCG[Callee].push_back(Caller);
530+
InDegree[Caller]++;
531+
}
532+
}
533+
534+
// Start with leaf functions.
535+
std::queue<Function *> Worklist;
536+
for (Function *F : AllFunctions) {
537+
if (InDegree[F] == 0)
538+
Worklist.push(F);
539+
}
540+
541+
// Kahn's algorithm for topological sort.
542+
while (!Worklist.empty()) {
543+
Function *F = Worklist.front();
544+
Worklist.pop();
545+
Result.push_back(F);
546+
547+
auto It = ReverseCG.find(F);
548+
if (It != ReverseCG.end()) {
549+
for (Function *Caller : It->second) {
550+
if (--InDegree[Caller] == 0)
551+
Worklist.push(Caller);
552+
}
553+
}
554+
}
555+
556+
return Result;
557+
}
558+
514559
/// Propagates aspects from leaves up to the top of call graph.
560+
/// Uses topological sort for efficient single-pass propagation.
515561
/// NB! Call graph corresponds to call graph of SYCL code which
516562
/// can't contain recursive calls. So there can't be loops in
517563
/// a call graph. But there can be path's intersections.
@@ -534,6 +580,91 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534580
AspectsMap[F].insert(LocalAspects.begin(), LocalAspects.end());
535581
}
536582

583+
/// Processes each function exactly once in bottom-up order.
584+
void propagateAspectsThroughCGOptimized(const std::vector<Function *> &TopoOrder,
585+
const CallGraphTy &CG,
586+
FunctionToAspectsMapTy &AspectsMap) {
587+
// Process in topological order.
588+
for (Function *F : TopoOrder) {
589+
auto It = CG.find(F);
590+
if (It == CG.end())
591+
continue;
592+
593+
// Merge aspects from all callees.
594+
for (Function *Callee : It->second) {
595+
const auto &CalleeAspects = AspectsMap[Callee];
596+
AspectsMap[F].insert(CalleeAspects.begin(), CalleeAspects.end());
597+
}
598+
}
599+
}
600+
601+
/// This reduces redundant type lookups when multiple instructions use the same type.
602+
AspectsSetTy getAspectsFromInstructions(Function &F,
603+
TypeToAspectsMapTy &TypesWithAspects,
604+
int FP64Aspect, bool FP64ConvEmu) {
605+
AspectsSetTy Result;
606+
607+
// Collect unique types used across all instructions.
608+
SmallDenseMap<const Type *, bool, 32> SeenTypes;
609+
610+
for (Instruction &I : instructions(F)) {
611+
// Check instruction return type.
612+
const Type *ReturnType = I.getType();
613+
if (auto *AI = dyn_cast<AllocaInst>(&I))
614+
ReturnType = AI->getAllocatedType();
615+
616+
bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction(I);
617+
618+
// Only analyze this type once.
619+
if (SeenTypes.try_emplace(ReturnType, IsFP64Conversion && hasDoubleType(ReturnType)).second) {
620+
const AspectsSetTy &Aspects = getAspectsFromType(ReturnType, TypesWithAspects);
621+
for (int Aspect : Aspects) {
622+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
623+
Result.insert(Aspect);
624+
}
625+
}
626+
627+
// Check operand types.
628+
for (const auto &OperandIt : I.operands()) {
629+
const Type *OpType = nullptr;
630+
if (const auto *GV = dyn_cast<GlobalValue>(OperandIt->stripPointerCasts()))
631+
OpType = GV->getValueType();
632+
else
633+
OpType = OperandIt->getType();
634+
635+
if (OpType && SeenTypes.try_emplace(OpType, IsFP64Conversion && hasDoubleType(OpType)).second) {
636+
const AspectsSetTy &Aspects = getAspectsFromType(OpType, TypesWithAspects);
637+
for (int Aspect : Aspects) {
638+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
639+
Result.insert(Aspect);
640+
}
641+
}
642+
}
643+
644+
// Check GEP source type.
645+
if (auto *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
646+
const Type *SourceType = GEPI->getSourceElementType();
647+
if (SeenTypes.try_emplace(SourceType, IsFP64Conversion && hasDoubleType(SourceType)).second) {
648+
const AspectsSetTy &Aspects = getAspectsFromType(SourceType, TypesWithAspects);
649+
for (int Aspect : Aspects) {
650+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
651+
Result.insert(Aspect);
652+
}
653+
}
654+
}
655+
656+
// Check instruction-level metadata.
657+
if (const MDNode *InstAspects = I.getMetadata("sycl_used_aspects")) {
658+
for (const MDOperand &MDOp : InstAspects->operands()) {
659+
const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue();
660+
Result.insert(cast<ConstantInt>(C)->getSExtValue());
661+
}
662+
}
663+
}
664+
665+
return Result;
666+
}
667+
537668
/// Processes a function:
538669
/// - checks if return and argument types are using any aspects
539670
/// - checks if instructions are using any aspects
@@ -564,19 +695,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564695
FunctionToUsedAspects[&F].insert(Aspect);
565696
}
566697

698+
// Optimized instruction analysis with type deduplication.
699+
const AspectsSetTy InstrAspects =
700+
getAspectsFromInstructions(F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
701+
FunctionToUsedAspects[&F].insert(InstrAspects.begin(), InstrAspects.end());
702+
703+
// Build call graph.
567704
for (Instruction &I : instructions(F)) {
568-
const AspectsSetTy Aspects =
569-
getAspectsUsedByInstruction(I, TypesWithAspects);
570-
for (const auto &Aspect : Aspects)
571-
if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType(I) ||
572-
!isFP64ConversionInstruction(I))
573-
FunctionToUsedAspects[&F].insert(Aspect);
574705
if (const auto *CI = dyn_cast<CallInst>(&I)) {
575706
if (!CI->isIndirectCall() && CI->getCalledFunction())
576707
CG[&F].insert(CI->getCalledFunction());
577708
}
578709
}
579710

711+
// Collect aspects from metadata (combined to reduce lookups).
580712
auto CollectAspectsFromMD = [&F](const char* MDName, FunctionToAspectsMapTy &Map) {
581713
if (const MDNode *MD = F.getMetadata(MDName)) {
582714
AspectsSetTy Aspects;
@@ -696,23 +828,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696828
collectVirtualFunctionSetInfo(F, VirtualFunctionSets);
697829
}
698830

831+
// Compute topological order once for both propagation passes.
832+
std::vector<Function *> TopoOrder = getTopologicalOrder(CG, EntryPoints);
833+
834+
// Handle virtual function sets (still needs old recursive propagation)
699835
SmallPtrSet<const Function *, 16> Visited;
700836
for (Function *F : EntryPoints) {
701-
propagateAspectsThroughCG(F, CG, FunctionToUsedAspects, Visited);
702837
processDeclaredVirtualFunctionSets(F, CG, FunctionToUsedAspects, Visited,
703838
VirtualFunctionSets);
704839
}
705840

841+
// Optimized single-pass propagation for used aspects.
842+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToUsedAspects);
843+
706844
if (ValidateAspects)
707845
validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues,
708846
EntryPoints, CG);
709847

710-
// The set of aspects from FunctionToDeclaredAspects should be merged to the
711-
// set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712-
// avoid errors during validation.
713-
Visited.clear();
714-
for (Function *F : EntryPoints)
715-
propagateAspectsThroughCG(F, CG, FunctionToDeclaredAspects, Visited);
848+
// Optimized single-pass propagation for declared aspects.
849+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToDeclaredAspects);
716850

717851
return {std::move(FunctionToUsedAspects),
718852
std::move(FunctionToDeclaredAspects)};

0 commit comments

Comments
 (0)