@@ -17,6 +17,8 @@ SPDX-License-Identifier: MIT
17
17
#include " Compiler/IGCPassSupport.h"
18
18
#include " SynchronizationObjectCoalescing.hpp"
19
19
#include " visa_igc_common_header.h"
20
+ #include " llvm/IR/IRBuilder.h"
21
+ #include " llvm/Analysis/CFG.h"
20
22
#include < utility>
21
23
#include < map>
22
24
@@ -285,6 +287,9 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass {
285
287
static_cast <SynchronizationCaseMask>(WriteSyncRead | WriteSyncWrite | AtomicSyncRead | AtomicSyncWrite |
286
288
WriteSyncAtomic | ReadSyncAtomic | ReadSyncWrite | AtomicSyncAtomic);
287
289
290
+ // //////////////////////////////////////////////////////////////////////
291
+ void CreateSourceValueInst (std::vector<llvm::Instruction *> &pAtomicInstToBeSourced, llvm::Instruction *pFenceInst);
292
+
288
293
// //////////////////////////////////////////////////////////////////////
289
294
void EraseRedundantInst (llvm::Instruction *pInst);
290
295
@@ -327,6 +332,7 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass {
327
332
328
333
// //////////////////////////////////////////////////////////////////////
329
334
bool IsRequiredForAtomicOperationsOrdering (const llvm::Instruction *pSourceInst,
335
+ std::vector<llvm::Instruction *> &pAtomicInstToBeSourced,
330
336
bool onlyGlobalAtomics = false ) const ;
331
337
332
338
// //////////////////////////////////////////////////////////////////////
@@ -440,6 +446,7 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass {
440
446
std::vector<llvm::Instruction *> m_LscMemoryFences;
441
447
std::vector<llvm::Instruction *> m_UntypedMemoryFences;
442
448
std::vector<llvm::Instruction *> m_ThreadGroupBarriers;
449
+ std::unordered_set<llvm::Instruction *> m_SourcedAtomicInstructions;
443
450
444
451
// this variable holds a mapping from a basic block to its memory instructions ordered by their occurrences in it
445
452
// (the initial index of line of this basic block - the number of instructions preceding an instruction it its basic
@@ -538,6 +545,107 @@ bool SynchronizationObjectCoalescing::ProcessFunction() {
538
545
return FindRedundancies ();
539
546
}
540
547
548
+ // Referenced from MemoryModelPass
549
+ inline PHINode *FindDominatingPhi (DominatorTree &DT, Instruction *def, BasicBlock *postDominator) {
550
+ IGC_ASSERT (def->getParent () != postDominator);
551
+ IGC_ASSERT (!DT.dominates (def, postDominator));
552
+ SmallPtrSet<PHINode *, 8 > seen;
553
+ SmallVector<User *, 8 > worklist (def->users ());
554
+ while (!worklist.empty ()) {
555
+ PHINode *phi = dyn_cast<PHINode>(worklist.pop_back_val ());
556
+ if (phi == nullptr || seen.count (phi) > 0 ) {
557
+ continue ;
558
+ }
559
+ if (phi->getParent () == postDominator || DT.dominates (phi, postDominator)) {
560
+ return phi;
561
+ }
562
+ seen.insert (phi);
563
+ }
564
+ return nullptr ;
565
+ }
566
+
567
+ // //////////////////////////////////////////////////////////////////////
568
+ // / @brief Fence Instruction responsible for only ordering of atomic Instructions
569
+ // / can be replaced with Source Value Intrinsic which will still maintain
570
+ // / the order of Instructions
571
+ void SynchronizationObjectCoalescing::CreateSourceValueInst (std::vector<llvm::Instruction *> &pAtomicInstToBeSourced,
572
+ llvm::Instruction *pFenceInst) {
573
+ IGC_ASSERT (pAtomicInstToBeSourced.size () > 0 );
574
+ // reversing the list to source the atomic instructions in the order
575
+ reverse (pAtomicInstToBeSourced.begin (), pAtomicInstToBeSourced.end ());
576
+ Function *funcPtr = GenISAIntrinsic::getDeclaration (pFenceInst->getModule (), GenISAIntrinsic::GenISA_source_value);
577
+ BasicBlock *fenceBB = pFenceInst->getParent ();
578
+
579
+ Function *F = pAtomicInstToBeSourced[0 ]->getFunction ();
580
+ DominatorTree DT (*F);
581
+ PostDominatorTree PDT (*F);
582
+ LoopInfo LI (DT);
583
+
584
+ for (llvm::Instruction *atomicInst : pAtomicInstToBeSourced) {
585
+ // Making sure that the Fence Inst is potentially reachable from the atomic Instruction.
586
+ if (!isPotentiallyReachable (atomicInst, pFenceInst, nullptr , &DT, &LI)) {
587
+ continue ;
588
+ }
589
+
590
+ BasicBlock *atomicBB = atomicInst->getParent ();
591
+ BasicBlock *fenceDominator = fenceBB;
592
+ Instruction *insertPoint = atomicBB->getTerminator ();
593
+ Value *sourceVal = cast<GenIntrinsicInst>(atomicInst);
594
+
595
+ // TODO: Determining Insert point can be improved which can postpone the source value intrinsic as long as possible.
596
+ // Similar analysis is done in FindOptimalInsertPoints() in ApplyCacheControls.cpp
597
+
598
+ // Check if fence Instruction BB post dominates atomic Instruction BB
599
+ // Else find the BB that is a predecessor of fence BB and post dominates atomic BB.
600
+ // If we don't find one, then the insert point is near the terminator of atomic BB
601
+ while (fenceDominator && fenceDominator != atomicBB) {
602
+ if (PDT.dominates (fenceDominator, atomicBB)) {
603
+ // If fence instruction is in same BB, then use fence as insert point
604
+ // Else use the terminator of fenceDominator as insert point
605
+ insertPoint = fenceBB == fenceDominator ? pFenceInst : fenceDominator->getTerminator ();
606
+ // It's possible that the atomic instruction does not dominate
607
+ // the post-dominator, find a PHI user of the atomic instruction
608
+ // that dominates the post-dominator.
609
+ if (!DT.dominates (atomicBB, fenceDominator)) {
610
+ PHINode *phi = FindDominatingPhi (DT, atomicInst, fenceDominator);
611
+ if (phi) {
612
+ sourceVal = phi;
613
+ } else {
614
+ // Fallback to inserting the source value in the basic
615
+ // block with the atomic instruction.
616
+ insertPoint = atomicBB->getTerminator ();
617
+ }
618
+ }
619
+ break ;
620
+ }
621
+ fenceDominator = fenceDominator->getSinglePredecessor ();
622
+ }
623
+ // If Fence is present in same BB as atomic, then insert at Fence Instruction
624
+ if (fenceBB == atomicBB) {
625
+ insertPoint = pFenceInst;
626
+ }
627
+
628
+ IRBuilder<> builder (insertPoint);
629
+ Type *sourceValType = sourceVal->getType ();
630
+
631
+ // Source value intrinsic accepts only i32.
632
+ if (sourceValType->isIntegerTy ()) {
633
+ sourceVal = builder.CreateZExtOrTrunc (sourceVal, builder.getInt32Ty ());
634
+ } else if (sourceValType->isFloatingPointTy ()) {
635
+ if (sourceValType->isFloatTy ()) {
636
+ sourceVal = builder.CreateBitCast (sourceVal, builder.getInt32Ty ());
637
+ } else {
638
+ sourceVal = builder.CreateFPToUI (sourceVal, builder.getInt32Ty ());
639
+ }
640
+ } else {
641
+ IGC_ASSERT_MESSAGE (0 , " Unexpected type" );
642
+ }
643
+
644
+ builder.CreateCall (funcPtr, {sourceVal});
645
+ m_SourcedAtomicInstructions.insert (atomicInst);
646
+ }
647
+ }
648
+
541
649
// //////////////////////////////////////////////////////////////////////
542
650
void SynchronizationObjectCoalescing::EraseRedundantInst (llvm::Instruction *pInst) {
543
651
bool isFence = IsFenceOperation (pInst);
@@ -740,7 +848,13 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
740
848
}
741
849
SynchronizationCaseMask referenceSyncCaseMask = GetStrictSynchronizationMask (pInst);
742
850
bool isObligatory = (syncCaseMask & referenceSyncCaseMask) != 0 ;
743
- isObligatory |= IsRequiredForAtomicOperationsOrdering (pInst, true /* onlyGlobalAtomics*/ );
851
+
852
+ std::vector<llvm::Instruction *> atomicInstToBeSourced;
853
+ if (!isObligatory) {
854
+ isObligatory =
855
+ IsRequiredForAtomicOperationsOrdering (pInst, atomicInstToBeSourced, true /* onlyGlobalAtomics*/ );
856
+ }
857
+
744
858
bool verifyUnsynchronizedInstructions = IsFenceOperation (pInst);
745
859
verifyUnsynchronizedInstructions &= (!isObligatory || syncCaseMask == ReadSyncWrite);
746
860
@@ -767,6 +881,9 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
767
881
#if _DEBUG
768
882
RegisterRedundancyExplanation (pInst, ExplanationEntry::GlobalMemoryRedundancy);
769
883
#endif // _DEBUG
884
+ if (IGC_IS_FLAG_ENABLED (ReplaceAtomicFenceWithSourceValue) && atomicInstToBeSourced.size () > 0 ) {
885
+ CreateSourceValueInst (atomicInstToBeSourced, const_cast <Instruction *>(pInst));
886
+ }
770
887
EraseRedundantGlobalScope (pInst);
771
888
isModified = true ;
772
889
SetLocalMemoryInstructionMask ();
@@ -831,7 +948,12 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
831
948
GetSynchronizationMaskForAllResources (localForwardMemoryInstructionMask, localBackwardMemoryInstructionMask);
832
949
SynchronizationCaseMask referenceSyncCaseMask = GetStrictSynchronizationMask (pInst);
833
950
bool isObligatory = (syncCaseMask & referenceSyncCaseMask) != 0 ;
834
- isObligatory |= IsRequiredForAtomicOperationsOrdering (pInst);
951
+
952
+ std::vector<llvm::Instruction *> atomicInstToBeSourced;
953
+ if (!isObligatory) {
954
+ isObligatory = IsRequiredForAtomicOperationsOrdering (pInst, atomicInstToBeSourced);
955
+ }
956
+
835
957
bool verifyUnsynchronizedInstructions = IsFenceOperation (pInst);
836
958
verifyUnsynchronizedInstructions &= (!isObligatory || syncCaseMask == ReadSyncWrite);
837
959
@@ -847,6 +969,9 @@ bool SynchronizationObjectCoalescing::FindRedundancies() {
847
969
#if _DEBUG
848
970
RegisterRedundancyExplanation (pInst, ExplanationEntry::StrictRedundancy);
849
971
#endif // _DEBUG
972
+ if (IGC_IS_FLAG_ENABLED (ReplaceAtomicFenceWithSourceValue) && atomicInstToBeSourced.size () > 0 ) {
973
+ CreateSourceValueInst (atomicInstToBeSourced, const_cast <Instruction *>(pInst));
974
+ }
850
975
EraseRedundantInst (pInst);
851
976
isModified = true ;
852
977
}
@@ -1731,8 +1856,9 @@ SynchronizationObjectCoalescing::GetUnsynchronizedForwardInstructionMask(const l
1731
1856
// / operations present before the fence (in program order)
1732
1857
// / @param pSourceInst the source synchronization instruction
1733
1858
// / @param onlyGlobalAtomics check only TGM and UGM atomic operations
1734
- bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering (const llvm::Instruction *pSourceInst,
1735
- bool onlyGlobalAtomics /* = false*/ ) const {
1859
+ bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering (
1860
+ const llvm::Instruction *pSourceInst, std::vector<llvm::Instruction *> &pAtomicInstToBeSourced,
1861
+ bool onlyGlobalAtomics /* = false*/ ) const {
1736
1862
if (!IsFenceOperation (pSourceInst)) {
1737
1863
// Not a fence, nothing to check
1738
1864
return false ;
@@ -1782,6 +1908,10 @@ bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering(cons
1782
1908
{
1783
1909
isPotentiallyUnsynchronizedAtomic = false ;
1784
1910
// Lambda that checks if a fence operation synchronizes the atomic operation.
1911
+ // This can be improved to detect the users of atomic instruction and end the search for fences once we find the
1912
+ // user. This user is essentially same as Source Value Intrinsic, however it can be reordered in visa affecting
1913
+ // the execution order of atomic instructions. If we can find a way to treat this user as a special instruction
1914
+ // and avoid reordering, we can skip creating new source value instruction.
1785
1915
std::function<bool (const llvm::Instruction *)> IsBoundaryInst = [this , &atomicPointerMemoryInstructionMask,
1786
1916
&isPotentiallyUnsynchronizedAtomic,
1787
1917
pSourceInst](const llvm::Instruction *pInst) {
@@ -1840,7 +1970,22 @@ bool SynchronizationObjectCoalescing::IsRequiredForAtomicOperationsOrdering(cons
1840
1970
if (!substituteFenceFound) {
1841
1971
// Found an atomic operation that requires the source fence
1842
1972
// instruction for correct memory ordering.
1843
- return true ;
1973
+
1974
+ // If ReplaceAtomicFenceWithSourceValue is true, we can replace this fence with GenISA_source_value.
1975
+ // This will source the atomic instruction and still maintains the order of atomic instructions.
1976
+ // Else return true marking the fence instruction as Obligatory.
1977
+
1978
+ if (IGC_IS_FLAG_ENABLED (ReplaceAtomicFenceWithSourceValue)) {
1979
+ // If a previous fence was replaced with source value intrinsic, GetVisibleMemoryInstructions will add the
1980
+ // same atomic instruction again for the next fence resulting in multiple source value intrinsics but we need
1981
+ // it to be sourced only once. Hence we check if it was already sourced previously. Continues to check all
1982
+ // valid atomic Instructions to be sourced.
1983
+ if (m_SourcedAtomicInstructions.find (const_cast <Instruction *>(pInst)) == m_SourcedAtomicInstructions.end ()) {
1984
+ pAtomicInstToBeSourced.push_back (const_cast <Instruction *>(pInst));
1985
+ }
1986
+ } else {
1987
+ return true ;
1988
+ }
1844
1989
}
1845
1990
}
1846
1991
}
@@ -2002,6 +2147,7 @@ void SynchronizationObjectCoalescing::InvalidateMembers() {
2002
2147
m_OrderedFenceInstructionsInBasicBlockCache.clear ();
2003
2148
m_OrderedBarrierInstructionsInBasicBlockCache.clear ();
2004
2149
m_BasicBlockMemoryInstructionMaskCache.clear ();
2150
+ m_SourcedAtomicInstructions.clear ();
2005
2151
#if _DEBUG
2006
2152
m_ExplanationEntries.clear ();
2007
2153
#endif // _DEBUG
0 commit comments