@@ -348,6 +348,28 @@ AspectsSetTy getAspectsUsedByInstruction(const Instruction &I,
348348  return  Result;
349349}
350350
351+ // / Collects aspects from all instructions in a function.
352+ // / Applies FP64 conversion emulation filtering per-instruction.
353+ AspectsSetTy getAspectsFromInstructions (Function &F,
354+                                         TypeToAspectsMapTy &TypesWithAspects,
355+                                         int  FP64Aspect, bool  FP64ConvEmu) {
356+   AspectsSetTy Result;
357+ 
358+   for  (Instruction &I : instructions (F)) {
359+     bool  IsFP64Conversion = FP64ConvEmu && isFP64ConversionInstruction (I);
360+     bool  HasDouble = hasDoubleType (I);
361+ 
362+     const  AspectsSetTy Aspects = getAspectsUsedByInstruction (I, TypesWithAspects);
363+ 
364+     for  (int  Aspect : Aspects) {
365+       if  (!FP64ConvEmu || Aspect != FP64Aspect || !HasDouble || !IsFP64Conversion)
366+         Result.insert (Aspect);
367+     }
368+   }
369+ 
370+   return  Result;
371+ }
372+ 
351373using  FunctionToAspectsMapTy = DenseMap<Function *, AspectsSetTy>;
352374using  CallGraphTy = DenseMap<Function *, SmallPtrSet<Function *, 8 >>;
353375
@@ -511,7 +533,54 @@ void validateUsedAspectsForFunctions(const FunctionToAspectsMapTy &Map,
511533  }
512534}
513535
536+ // / Computes topological order of functions in the call graph.
537+ // / Returns functions in reverse topological order.
538+ // / This allows single-pass bottom-up propagation.
539+ std::vector<Function *>
540+ getTopologicalOrder (const  CallGraphTy &CG,
541+                     const  std::vector<Function *> &EntryPoints) {
542+   std::vector<Function *> Result;
543+   DenseMap<const  Function *, unsigned > InDegree;
544+ 
545+   //  Build reverse call graph and compute in-degrees.
546+   DenseMap<Function *, SmallVector<Function *, 4 >> ReverseCG;
547+   SmallPtrSet<Function *, 32 > AllFunctions;
548+   for  (const  auto  &[Caller, Callees] : CG) {
549+     AllFunctions.insert (Caller);
550+     for  (Function *Callee : Callees) {
551+       AllFunctions.insert (Callee);
552+       ReverseCG[Callee].push_back (Caller);
553+       InDegree[Caller]++;
554+     }
555+   }
556+ 
557+   //  Start with leaf functions.
558+   std::queue<Function *> Worklist;
559+   for  (Function *F : AllFunctions) {
560+     if  (InDegree[F] == 0 )
561+       Worklist.push (F);
562+   }
563+ 
564+   //  Kahn's algorithm for topological sort.
565+   while  (!Worklist.empty ()) {
566+     Function *F = Worklist.front ();
567+     Worklist.pop ();
568+     Result.push_back (F);
569+ 
570+     auto  It = ReverseCG.find (F);
571+     if  (It != ReverseCG.end ()) {
572+       for  (Function *Caller : It->second ) {
573+         if  (--InDegree[Caller] == 0 )
574+           Worklist.push (Caller);
575+       }
576+     }
577+   }
578+ 
579+   return  Result;
580+ }
581+ 
514582// / Propagates aspects from leaves up to the top of call graph.
583+ // / Uses topological sort for efficient single-pass propagation.
515584// / NB! Call graph corresponds to call graph of SYCL code which
516585// / can't contain recursive calls. So there can't be loops in
517586// / a call graph. But there can be path's intersections.
@@ -534,6 +603,24 @@ void propagateAspectsThroughCG(Function *F, CallGraphTy &CG,
534603  AspectsMap[F].insert (LocalAspects.begin (), LocalAspects.end ());
535604}
536605
606+ // / Processes each function exactly once in bottom-up order.
607+ void  propagateAspectsThroughCGOptimized (
608+     const  std::vector<Function *> &TopoOrder, const  CallGraphTy &CG,
609+     FunctionToAspectsMapTy &AspectsMap) {
610+   //  Process in topological order.
611+   for  (Function *F : TopoOrder) {
612+     auto  It = CG.find (F);
613+     if  (It == CG.end ())
614+       continue ;
615+ 
616+     //  Merge aspects from all callees.
617+     for  (Function *Callee : It->second ) {
618+       const  auto  &CalleeAspects = AspectsMap[Callee];
619+       AspectsMap[F].insert (CalleeAspects.begin (), CalleeAspects.end ());
620+     }
621+   }
622+ }
623+ 
537624// / Processes a function:
538625// /  - checks if return and argument types are using any aspects
539626// /  - checks if instructions are using any aspects
@@ -564,19 +651,19 @@ void processFunction(Function &F, FunctionToAspectsMapTy &FunctionToUsedAspects,
564651        FunctionToUsedAspects[&F].insert (Aspect);
565652  }
566653
654+   const  AspectsSetTy InstrAspects =
655+       getAspectsFromInstructions (F, TypesWithAspects, FP64Aspect, FP64ConvEmu);
656+   FunctionToUsedAspects[&F].insert (InstrAspects.begin (), InstrAspects.end ());
657+ 
658+   //  Build call graph.
567659  for  (Instruction &I : instructions (F)) {
568-     const  AspectsSetTy Aspects =
569-         getAspectsUsedByInstruction (I, TypesWithAspects);
570-     for  (const  auto  &Aspect : Aspects)
571-       if  (!FP64ConvEmu || (Aspect != FP64Aspect) || !hasDoubleType (I) ||
572-           !isFP64ConversionInstruction (I))
573-         FunctionToUsedAspects[&F].insert (Aspect);
574660    if  (const  auto  *CI = dyn_cast<CallInst>(&I)) {
575661      if  (!CI->isIndirectCall () && CI->getCalledFunction ())
576662        CG[&F].insert (CI->getCalledFunction ());
577663    }
578664  }
579665
666+   //  Collect aspects from metadata (combined to reduce lookups).
580667  auto  CollectAspectsFromMD = [&F](const  char * MDName, FunctionToAspectsMapTy &Map) {
581668    if  (const  MDNode *MD = F.getMetadata (MDName)) {
582669      AspectsSetTy Aspects;
@@ -696,23 +783,25 @@ buildFunctionsToAspectsMap(Module &M, TypeToAspectsMapTy &TypesWithAspects,
696783    collectVirtualFunctionSetInfo (F, VirtualFunctionSets);
697784  }
698785
786+   //  Compute topological order once for both propagation passes.
787+   std::vector<Function *> TopoOrder = getTopologicalOrder (CG, EntryPoints);
788+ 
789+   //  Handle virtual function sets (still needs old recursive propagation)
699790  SmallPtrSet<const  Function *, 16 > Visited;
700791  for  (Function *F : EntryPoints) {
701-     propagateAspectsThroughCG (F, CG, FunctionToUsedAspects, Visited);
702792    processDeclaredVirtualFunctionSets (F, CG, FunctionToUsedAspects, Visited,
703793                                       VirtualFunctionSets);
704794  }
705795
796+   //  Optimized single-pass propagation for used aspects.
797+   propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToUsedAspects);
798+ 
706799  if  (ValidateAspects)
707800    validateUsedAspectsForFunctions (FunctionToUsedAspects, AspectValues,
708801                                    EntryPoints, CG);
709802
710-   //  The set of aspects from FunctionToDeclaredAspects should be merged to the
711-   //  set of FunctionToUsedAspects after validateUsedAspectsForFunctions call to
712-   //  avoid errors during validation.
713-   Visited.clear ();
714-   for  (Function *F : EntryPoints)
715-     propagateAspectsThroughCG (F, CG, FunctionToDeclaredAspects, Visited);
803+   //  Optimized single-pass propagation for declared aspects.
804+   propagateAspectsThroughCGOptimized (TopoOrder, CG, FunctionToDeclaredAspects);
716805
717806  return  {std::move (FunctionToUsedAspects),
718807          std::move (FunctionToDeclaredAspects)};
0 commit comments