Skip to content

Commit 5dc166f

Browse files
committed
[SYCL] Rewrite aspect propagation function lookup using toposort
Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent 3ccc8ec commit 5dc166f

File tree

1 file changed

+102
-13
lines changed

1 file changed

+102
-13
lines changed

llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp

Lines changed: 102 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,28 @@ AspectsSetTy getAspectsUsedByInstruction(const Instruction &I,
348348
return Result;
349349
}
350350

351+
/// Collects aspects from all instructions in a function.
352+
/// Applies FP64 conversion emulation filtering per-instruction.
353+
AspectsSetTy getAspectsFromInstructions(Function &F,
354+
TypeToAspectsMapTy &TypesWithAspects,
355+
int FP64Aspect, bool FP64ConvEmu) {
356+
AspectsSetTy Result;
357+
358+
for (Instruction &I : instructions(F)) {
359+
bool IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction(I);
360+
bool HasDouble = hasDoubleType(I);
361+
362+
const AspectsSetTy Aspects = getAspectsUsedByInstruction(I, TypesWithAspects);
363+
364+
for (int Aspect : Aspects) {
365+
if (!FP64ConvEmu || Aspect != FP64Aspect || !HasDouble || !IsFP64Conversion)
366+
Result.insert(Aspect);
367+
}
368+
}
369+
370+
return Result;
371+
}
372+
351373
using FunctionToAspectsMapTy = DenseMap<Function *, AspectsSetTy>;
352374
using CallGraphTy = DenseMap<Function *, SmallPtrSet<Function *, 8>>;
353375

@@ -511,7 +533,54 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511533
}
512534
}
513535

536+
/// Computes topological order of functions in the call graph.
537+
/// Returns functions in reverse topological order.
538+
/// This allows single-pass bottom-up propagation.
539+
std::vector<Function *>
540+
getTopologicalOrder(const CallGraphTy &CG,
541+
const std::vector<Function *> &EntryPoints) {
542+
std::vector<Function *> Result;
543+
DenseMap<const Function *, unsigned> InDegree;
544+
545+
// Build reverse call graph and compute in-degrees.
546+
DenseMap<Function *, SmallVector<Function *, 4>> ReverseCG;
547+
SmallPtrSet<Function *, 32> AllFunctions;
548+
for (const auto &[Caller, Callees] : CG) {
549+
AllFunctions.insert(Caller);
550+
for (Function *Callee : Callees) {
551+
AllFunctions.insert(Callee);
552+
ReverseCG[Callee].push_back(Caller);
553+
InDegree[Caller]++;
554+
}
555+
}
556+
557+
// Start with leaf functions.
558+
std::queue<Function *> Worklist;
559+
for (Function *F : AllFunctions) {
560+
if (InDegree[F] == 0)
561+
Worklist.push(F);
562+
}
563+
564+
// Kahn's algorithm for topological sort.
565+
while (!Worklist.empty()) {
566+
Function *F = Worklist.front();
567+
Worklist.pop();
568+
Result.push_back(F);
569+
570+
auto It = ReverseCG.find(F);
571+
if (It != ReverseCG.end()) {
572+
for (Function *Caller : It->second) {
573+
if (--InDegree[Caller] == 0)
574+
Worklist.push(Caller);
575+
}
576+
}
577+
}
578+
579+
return Result;
580+
}
581+
514582
/// Propagates aspects from leaves up to the top of call graph.
583+
/// Uses topological sort for efficient single-pass propagation.
515584
/// NB! Call graph corresponds to call graph of SYCL code which
516585
/// can't contain recursive calls. So there can't be loops in
517586
/// a call graph. But there can be path's intersections.
@@ -534,6 +603,24 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534603
AspectsMap[F].insert(LocalAspects.begin(), LocalAspects.end());
535604
}
536605

606+
/// Processes each function exactly once in bottom-up order.
607+
void propagateAspectsThroughCGOptimized(
608+
const std::vector<Function *> &TopoOrder, const CallGraphTy &CG,
609+
FunctionToAspectsMapTy &AspectsMap) {
610+
// Process in topological order.
611+
for (Function *F : TopoOrder) {
612+
auto It = CG.find(F);
613+
if (It == CG.end())
614+
continue;
615+
616+
// Merge aspects from all callees.
617+
for (Function *Callee : It->second) {
618+
const auto &CalleeAspects = AspectsMap[Callee];
619+
AspectsMap[F].insert(CalleeAspects.begin(), CalleeAspects.end());
620+
}
621+
}
622+
}
623+
537624
/// Processes a function:
538625
/// - checks if return and argument types are using any aspects
539626
/// - checks if instructions are using any aspects
@@ -564,19 +651,19 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564651
FunctionToUsedAspects[&F].insert(Aspect);
565652
}
566653

654+
const AspectsSetTy InstrAspects =
655+
getAspectsFromInstructions(F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
656+
FunctionToUsedAspects[&F].insert(InstrAspects.begin(), InstrAspects.end());
657+
658+
// Build call graph.
567659
for (Instruction &I : instructions(F)) {
568-
const AspectsSetTy Aspects =
569-
getAspectsUsedByInstruction(I, TypesWithAspects);
570-
for (const auto &Aspect : Aspects)
571-
if (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType(I) ||
572-
!isFP64ConversionInstruction(I))
573-
FunctionToUsedAspects[&F].insert(Aspect);
574660
if (const auto *CI = dyn_cast<CallInst>(&I)) {
575661
if (!CI->isIndirectCall() && CI->getCalledFunction())
576662
CG[&F].insert(CI->getCalledFunction());
577663
}
578664
}
579665

666+
// Collect aspects from metadata (combined to reduce lookups).
580667
auto CollectAspectsFromMD = [&F](const char* MDName, FunctionToAspectsMapTy &Map) {
581668
if (const MDNode *MD = F.getMetadata(MDName)) {
582669
AspectsSetTy Aspects;
@@ -696,23 +783,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696783
collectVirtualFunctionSetInfo(F, VirtualFunctionSets);
697784
}
698785

786+
// Compute topological order once for both propagation passes.
787+
std::vector<Function *> TopoOrder = getTopologicalOrder(CG, EntryPoints);
788+
789+
// Handle virtual function sets (still needs old recursive propagation)
699790
SmallPtrSet<const Function *, 16> Visited;
700791
for (Function *F : EntryPoints) {
701-
propagateAspectsThroughCG(F, CG, FunctionToUsedAspects, Visited);
702792
processDeclaredVirtualFunctionSets(F, CG, FunctionToUsedAspects, Visited,
703793
VirtualFunctionSets);
704794
}
705795

796+
// Optimized single-pass propagation for used aspects.
797+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToUsedAspects);
798+
706799
if (ValidateAspects)
707800
validateUsedAspectsForFunctions(FunctionToUsedAspects, AspectValues,
708801
EntryPoints, CG);
709802

710-
// The set of aspects from FunctionToDeclaredAspects should be merged to the
711-
// set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712-
// avoid errors during validation.
713-
Visited.clear();
714-
for (Function *F : EntryPoints)
715-
propagateAspectsThroughCG(F, CG, FunctionToDeclaredAspects, Visited);
803+
// Optimized single-pass propagation for declared aspects.
804+
propagateAspectsThroughCGOptimized(TopoOrder, CG, FunctionToDeclaredAspects);
716805

717806
return {std::move(FunctionToUsedAspects),
718807
std::move(FunctionToDeclaredAspects)};

0 commit comments

Comments
 (0)