Skip to content

LICM: handle memory dependency for store sinking correctly #81167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 32 additions & 23 deletions lib/SILOptimizer/LoopTransforms/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -714,10 +714,13 @@ bool LoopTreeOptimization::isSafeReadOnlyApply(BasicCalleeAnalysis *BCA, ApplyIn

static void checkSideEffects(swift::SILInstruction &Inst,
InstSet &SideEffectInsts,
SmallVectorImpl<SILInstruction *> &sideEffectsInBlock) {
SmallVectorImpl<SILInstruction *> &sideEffectsInBlock,
bool &hasOtherMemReadingInsts) {
if (Inst.mayHaveSideEffects()) {
SideEffectInsts.insert(&Inst);
sideEffectsInBlock.push_back(&Inst);
} else if (Inst.mayReadFromMemory()) {
hasOtherMemReadingInsts = true;
}
}

Expand Down Expand Up @@ -885,11 +888,15 @@ void LoopTreeOptimization::analyzeCurrentLoop(
SmallVector<BeginAccessInst *, 8> BeginAccesses;
SmallVector<FullApplySite, 8> fullApplies;

// True if the loop has instructions which (may) read from memory, which are not
// in `Loads` and not in `sideEffects`.
bool hasOtherMemReadingInsts = false;

for (auto *BB : Loop->getBlocks()) {
SmallVector<SILInstruction *, 8> sideEffectsInBlock;
for (auto &Inst : *BB) {
if (hasOwnershipOperandsOrResults(&Inst)) {
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
// Collect fullApplies to be checked in analyzeBeginAccess
if (auto fullApply = FullApplySite::isa(&Inst)) {
fullApplies.push_back(fullApply);
Expand Down Expand Up @@ -921,12 +928,12 @@ void LoopTreeOptimization::analyzeCurrentLoop(
}
Stores.push_back(store);
LoadsAndStores.push_back(&Inst);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
break;
}
case SILInstructionKind::BeginAccessInst:
BeginAccesses.push_back(cast<BeginAccessInst>(&Inst));
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
break;
case SILInstructionKind::RefElementAddrInst:
SpecialHoist.push_back(cast<RefElementAddrInst>(&Inst));
Expand All @@ -937,7 +944,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
// cond_fail that would have protected (executed before) a memory access
// must - after hoisting - also be executed before said access.
HoistUp.insert(&Inst);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
break;
case SILInstructionKind::ApplyInst: {
auto *AI = cast<ApplyInst>(&Inst);
Expand Down Expand Up @@ -971,7 +978,7 @@ void LoopTreeOptimization::analyzeCurrentLoop(
}
}

checkSideEffects(Inst, sideEffects, sideEffectsInBlock);
checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts);
if (canHoistUpDefault(&Inst, Loop, DomTree, RunsOnHighLevelSIL)) {
HoistUp.insert(&Inst);
}
Expand Down Expand Up @@ -1013,23 +1020,25 @@ void LoopTreeOptimization::analyzeCurrentLoop(
}
}

// Collect memory locations for which we can move all loads and stores out
// of the loop.
//
// Note: The Loads set and LoadsAndStores set may mutate during this loop.
for (StoreInst *SI : Stores) {
// Use AccessPathWithBase to recover a base address that can be used for
// newly inserted memory operations. If we instead teach hoistLoadsAndStores
// how to rematerialize global_addr, then we don't need this base.
auto access = AccessPathWithBase::compute(SI->getDest());
auto accessPath = access.accessPath;
if (accessPath.isValid() &&
(access.base && isLoopInvariant(access.base, Loop))) {
if (isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, SI->getDest(),
accessPath)) {
if (!LoadAndStoreAddrs.count(accessPath)) {
if (splitLoads(Loads, accessPath, SI->getDest())) {
LoadAndStoreAddrs.insert(accessPath);
if (!hasOtherMemReadingInsts) {
// Collect memory locations for which we can move all loads and stores out
// of the loop.
//
// Note: The Loads set and LoadsAndStores set may mutate during this loop.
for (StoreInst *SI : Stores) {
// Use AccessPathWithBase to recover a base address that can be used for
// newly inserted memory operations. If we instead teach hoistLoadsAndStores
// how to rematerialize global_addr, then we don't need this base.
auto access = AccessPathWithBase::compute(SI->getDest());
auto accessPath = access.accessPath;
if (accessPath.isValid() &&
(access.base && isLoopInvariant(access.base, Loop))) {
if (isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, SI->getDest(),
accessPath)) {
if (!LoadAndStoreAddrs.count(accessPath)) {
if (splitLoads(Loads, accessPath, SI->getDest())) {
LoadAndStoreAddrs.insert(accessPath);
}
}
}
}
Expand Down
37 changes: 37 additions & 0 deletions test/SILOptimizer/licm.sil
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ struct NonCopyable : ~Copyable {
var x: Int
}

struct S {
var i: Int
var s: String
}

// globalArray
sil_global @globalArray : $Storage

Expand Down Expand Up @@ -1667,3 +1672,35 @@ bb3:
return %r : $()
}

// CHECK-LABEL: sil [ossa] @store_and_load_borrow :
// CHECK: bb1({{.*}}):
// CHECK: store %1 to [trivial]
// CHECK: load_borrow
// CHECK: bb2:
// CHECK-LABEL: } // end sil function 'store_and_load_borrow'
sil [ossa] @store_and_load_borrow : $@convention(thin) (@inout S, Int) -> () {
bb0(%0 : $*S, %1 : $Int):
%2 = load_borrow %0
%3 = struct_element_addr %0, #S.i
br bb1(%2)

bb1(%4 : @reborrow $S):
%5 = borrowed %4 from ()
end_borrow %5
store %1 to [trivial] %3
%10 = load_borrow %0
cond_br undef, bb2, bb3

bb2:
br bb1(%10)

bb3:
br bb4(%10)

bb4(%14 : @reborrow $S):
%15 = borrowed %14 from ()
end_borrow %15
%r = tuple()
return %r
}