Skip to content

Commit 17d9e66

Browse files
committed
[SYCL] Rewrite aspect propagation function lookup using toposort
Signed-off-by: Dmitry Sidorov <dmitrii.s.sidorov@gmail.com>
1 parent 3ccc8ec commit 17d9e66

File tree

1 file changed

+162
-13
lines changed

1 file changed

+162
-13
lines changed

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 162 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,54 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511511
}
512512
}
513513

514+
/// Computes topological order of functions in the call graph.
515+
/// Returns functions in reverse topological order.
516+
/// This allows single-pass bottom-up propagation.
517+
std::vector<Function *>
518+
getTopologicalOrder(const CallGraphTy &CG,
519+
const std::vector<Function *> &EntryPoints) {
520+
std::vector<Function *> Result;
521+
DenseMap<const Function *, unsigned> InDegree;
522+
523+
// Build reverse call graph and compute in-degrees.
524+
DenseMap<Function *, SmallVector<Function *, 4>> ReverseCG;
525+
SmallPtrSet<Function *, 32> AllFunctions;
526+
for (const auto &[Caller, Callees] : CG) {
527+
AllFunctions.insert(Caller);
528+
for (Function *Callee : Callees) {
529+
AllFunctions.insert(Callee);
530+
ReverseCG[Callee].push_back(Caller);
531+
InDegree[Caller]++;
532+
}
533+
}
534+
535+
// Start with leaf functions.
536+
std::queue<Function *> Worklist;
537+
for (Function *F : AllFunctions) {
538+
if (InDegree[F] == 0)
539+
Worklist.push(F);
540+
}
541+
542+
// Kahn's algorithm for topological sort.
543+
while (!Worklist.empty()) {
544+
Function *F = Worklist.front();
545+
Worklist.pop();
546+
Result.push_back(F);
547+
548+
auto It = ReverseCG.find(F);
549+
if (It != ReverseCG.end()) {
550+
for (Function *Caller : It->second) {
551+
if (--InDegree[Caller] == 0)
552+
Worklist.push(Caller);
553+
}
554+
}
555+
}
556+
557+
return Result;
558+
}
559+
514560
/// Propagates aspects from leaves up to the top of call graph.
561+
/// Uses topological sort for efficient single-pass propagation.
515562
/// NB! Call graph corresponds to call graph of SYCL code which
516563
/// can't contain recursive calls. So there can't be loops in
517564
/// a call graph. But there can be path's intersections.
@@ -534,6 +581,105 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534581
AspectsMap[F].insert(LocalAspects.begin(), LocalAspects.end());
535582
}
536583

584+
/// Processes each function exactly once in bottom-up order.
585+
void propagateAspectsThroughCGOptimized(
586+
const std::vector<Function *> &TopoOrder, const CallGraphTy &CG,
587+
FunctionToAspectsMapTy &AspectsMap) {
588+
// Process in topological order.
589+
for (Function *F : TopoOrder) {
590+
auto It = CG.find(F);
591+
if (It == CG.end())
592+
continue;
593+
594+
// Merge aspects from all callees.
595+
for (Function *Callee : It->second) {
596+
const auto &CalleeAspects = AspectsMap[Callee];
597+
AspectsMap[F].insert(CalleeAspects.begin(), CalleeAspects.end());
598+
}
599+
}
600+
}
601+
602+
/// This reduces redundant type lookups when multiple instructions use the same
603+
/// type.
604+
AspectsSetTy getAspectsFromInstructions(Function &F,
605+
TypeToAspectsMapTy &TypesWithAspects,
606+
int FP64Aspect, bool FP64ConvEmu) {
607+
AspectsSetTy Result;
608+
609+
// Collect unique types used across all instructions.
610+
SmallDenseMap<const Type *, bool, 32> SeenTypes;
611+
612+
for (Instruction &I : instructions(F)) {
613+
// Check instruction return type.
614+
const Type *ReturnType = I.getType();
615+
if (auto *AI = dyn_cast<AllocaInst>(&I))
616+
ReturnType = AI->getAllocatedType();
617+
618+
bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction(I);
619+
620+
// Only analyze this type once.
621+
if (SeenTypes
622+
.try_emplace(ReturnType,
623+
IsFP64Conversion && hasDoubleType(ReturnType))
624+
.second) {
625+
const AspectsSetTy &Aspects =
626+
getAspectsFromType(ReturnType, TypesWithAspects);
627+
for (int Aspect : Aspects) {
628+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[ReturnType])
629+
Result.insert(Aspect);
630+
}
631+
}
632+
633+
// Check operand types.
634+
for (const auto &OperandIt : I.operands()) {
635+
const Type *OpType = nullptr;
636+
if (const auto *GV =
637+
dyn_cast<GlobalValue>(OperandIt->stripPointerCasts()))
638+
OpType = GV->getValueType();
639+
else
640+
OpType = OperandIt->getType();
641+
642+
if (OpType &&
643+
SeenTypes
644+
.try_emplace(OpType, IsFP64Conversion && hasDoubleType(OpType))
645+
.second) {
646+
const AspectsSetTy &Aspects =
647+
getAspectsFromType(OpType, TypesWithAspects);
648+
for (int Aspect : Aspects) {
649+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[OpType])
650+
Result.insert(Aspect);
651+
}
652+
}
653+
}
654+
655+
// Check GEP source type.
656+
if (auto *GEPI = dyn_cast<GetElementPtrInst>(&I)) {
657+
const Type *SourceType = GEPI->getSourceElementType();
658+
if (SeenTypes
659+
.try_emplace(SourceType,
660+
IsFP64Conversion && hasDoubleType(SourceType))
661+
.second) {
662+
const AspectsSetTy &Aspects =
663+
getAspectsFromType(SourceType, TypesWithAspects);
664+
for (int Aspect : Aspects) {
665+
if (!FP64ConvEmu || Aspect != FP64Aspect || !SeenTypes[SourceType])
666+
Result.insert(Aspect);
667+
}
668+
}
669+
}
670+
671+
// Check instruction-level metadata.
672+
if (const MDNode *InstAspects = I.getMetadata("sycl_used_aspects")) {
673+
for (const MDOperand &MDOp : InstAspects->operands()) {
674+
const Constant *C = cast<ConstantAsMetadata>(MDOp)->getValue();
675+
Result.insert(cast<ConstantInt>(C)->getSExtValue());
676+
}
677+
}
678+
}
679+
680+
return Result;
681+
}
682+
537683
/// Processes a function:
538684
/// - checks if return and argument types are using any aspects
539685
/// - checks if instructions are using any aspects
@@ -564,19 +710,20 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564710
FunctionToUsedAspects[&F].insert(Aspect);
565711
}
566712

713+
// Optimized instruction analysis with type deduplication.
714+
const AspectsSetTy InstrAspects =
715+
getAspectsFromInstructions(F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
716+
FunctionToUsedAspects[&F].insert(InstrAspects.begin(), InstrAspects.end());
717+
718+
// Build call graph.
567719
for (Instruction &I : instructions(F)) {
568-
const AspectsSetTy Aspects =
569-
getAspectsUsedByInstruction(I, TypesWithAspects);
570-
for (const auto &Aspect : Aspects)
571-
if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType(I) ||
572-
!isFP64ConversionInstruction(I))
573-
FunctionToUsedAspects[&F].insert(Aspect);
574720
if (const auto *CI = dyn_cast<CallInst>(&I)) {
575721
if (!CI->isIndirectCall() && CI->getCalledFunction())
576722
CG[&F].insert(CI->getCalledFunction());
577723
}
578724
}
579725

726+
// Collect aspects from metadata (combined to reduce lookups).
580727
auto CollectAspectsFromMD = [&F](const char* MDName, FunctionToAspectsMapTy &Map) {
581728
if (const MDNode *MD = F.getMetadata(MDName)) {
582729
AspectsSetTy Aspects;
@@ -696,23 +843,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696843
collectVirtualFunctionSetInfo(F, VirtualFunctionSets);
697844
}
698845

846+
// Compute topological order once for both propagation passes.
847+
std::vector<Function *> TopoOrder = getTopologicalOrder(CG, EntryPoints);
848+
849+
// Handle virtual function sets (still needs old recursive propagation)
699850
SmallPtrSet<const Function *, 16> Visited;
700851
for (Function *F : EntryPoints) {
701-
propagateAspectsThroughCG(F, CG, FunctionToUsedAspects, Visited);
702852
processDeclaredVirtualFunctionSets(F, CG, FunctionToUsedAspects, Visited,
703853
VirtualFunctionSets);
704854
}
705855

856+
// Optimized single-pass propagation for used aspects.
857+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToUsedAspects);
858+
706859
if (ValidateAspects)
707860
validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues,
708861
EntryPoints, CG);
709862

710-
// The set of aspects from FunctionToDeclaredAspects should be merged to the
711-
// set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712-
// avoid errors during validation.
713-
Visited.clear();
714-
for (Function *F : EntryPoints)
715-
propagateAspectsThroughCG(F, CG, FunctionToDeclaredAspects, Visited);
863+
// Optimized single-pass propagation for declared aspects.
864+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToDeclaredAspects);
716865

717866
return {std::move(FunctionToUsedAspects),
718867
std::move(FunctionToDeclaredAspects)};

0 commit comments

Comments
 (0)