diff --git a/lib/SILOptimizer/LoopTransforms/LICM.cpp b/lib/SILOptimizer/LoopTransforms/LICM.cpp index 6013b23658033..7d015e4592d15 100644 --- a/lib/SILOptimizer/LoopTransforms/LICM.cpp +++ b/lib/SILOptimizer/LoopTransforms/LICM.cpp @@ -714,10 +714,13 @@ bool LoopTreeOptimization::isSafeReadOnlyApply(BasicCalleeAnalysis *BCA, ApplyIn static void checkSideEffects(swift::SILInstruction &Inst, InstSet &SideEffectInsts, - SmallVectorImpl &sideEffectsInBlock) { + SmallVectorImpl &sideEffectsInBlock, + bool &hasOtherMemReadingInsts) { if (Inst.mayHaveSideEffects()) { SideEffectInsts.insert(&Inst); sideEffectsInBlock.push_back(&Inst); + } else if (Inst.mayReadFromMemory()) { + hasOtherMemReadingInsts = true; } } @@ -885,11 +888,15 @@ void LoopTreeOptimization::analyzeCurrentLoop( SmallVector BeginAccesses; SmallVector fullApplies; + // True if the loop has instructions which (may) read from memory, which are not + // in `Loads` and not in `sideEffects`. + bool hasOtherMemReadingInsts = false; + for (auto *BB : Loop->getBlocks()) { SmallVector sideEffectsInBlock; for (auto &Inst : *BB) { if (hasOwnershipOperandsOrResults(&Inst)) { - checkSideEffects(Inst, sideEffects, sideEffectsInBlock); + checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts); // Collect fullApplies to be checked in analyzeBeginAccess if (auto fullApply = FullApplySite::isa(&Inst)) { fullApplies.push_back(fullApply); @@ -921,12 +928,12 @@ void LoopTreeOptimization::analyzeCurrentLoop( } Stores.push_back(store); LoadsAndStores.push_back(&Inst); - checkSideEffects(Inst, sideEffects, sideEffectsInBlock); + checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts); break; } case SILInstructionKind::BeginAccessInst: BeginAccesses.push_back(cast(&Inst)); - checkSideEffects(Inst, sideEffects, sideEffectsInBlock); + checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts); break; case SILInstructionKind::RefElementAddrInst: SpecialHoist.push_back(cast(&Inst)); @@ -937,7 +944,7 @@ void LoopTreeOptimization::analyzeCurrentLoop( // cond_fail that would have protected (executed before) a memory access // must - after hoisting - also be executed before said access. HoistUp.insert(&Inst); - checkSideEffects(Inst, sideEffects, sideEffectsInBlock); + checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts); break; case SILInstructionKind::ApplyInst: { auto *AI = cast(&Inst); @@ -971,7 +978,7 @@ void LoopTreeOptimization::analyzeCurrentLoop( } } - checkSideEffects(Inst, sideEffects, sideEffectsInBlock); + checkSideEffects(Inst, sideEffects, sideEffectsInBlock, hasOtherMemReadingInsts); if (canHoistUpDefault(&Inst, Loop, DomTree, RunsOnHighLevelSIL)) { HoistUp.insert(&Inst); } @@ -1013,23 +1020,25 @@ void LoopTreeOptimization::analyzeCurrentLoop( } } - // Collect memory locations for which we can move all loads and stores out - // of the loop. - // - // Note: The Loads set and LoadsAndStores set may mutate during this loop. - for (StoreInst *SI : Stores) { - // Use AccessPathWithBase to recover a base address that can be used for - // newly inserted memory operations. If we instead teach hoistLoadsAndStores - // how to rematerialize global_addr, then we don't need this base. - auto access = AccessPathWithBase::compute(SI->getDest()); - auto accessPath = access.accessPath; - if (accessPath.isValid() && - (access.base && isLoopInvariant(access.base, Loop))) { - if (isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, SI->getDest(), - accessPath)) { - if (!LoadAndStoreAddrs.count(accessPath)) { - if (splitLoads(Loads, accessPath, SI->getDest())) { - LoadAndStoreAddrs.insert(accessPath); + if (!hasOtherMemReadingInsts) { + // Collect memory locations for which we can move all loads and stores out + // of the loop. + // + // Note: The Loads set and LoadsAndStores set may mutate during this loop. + for (StoreInst *SI : Stores) { + // Use AccessPathWithBase to recover a base address that can be used for + // newly inserted memory operations. If we instead teach hoistLoadsAndStores + // how to rematerialize global_addr, then we don't need this base. + auto access = AccessPathWithBase::compute(SI->getDest()); + auto accessPath = access.accessPath; + if (accessPath.isValid() && + (access.base && isLoopInvariant(access.base, Loop))) { + if (isOnlyLoadedAndStored(AA, sideEffects, Loads, Stores, SI->getDest(), + accessPath)) { + if (!LoadAndStoreAddrs.count(accessPath)) { + if (splitLoads(Loads, accessPath, SI->getDest())) { + LoadAndStoreAddrs.insert(accessPath); + } } } } diff --git a/test/SILOptimizer/licm.sil b/test/SILOptimizer/licm.sil index 4955174801c91..feaed871317b7 100644 --- a/test/SILOptimizer/licm.sil +++ b/test/SILOptimizer/licm.sil @@ -18,6 +18,11 @@ struct NonCopyable : ~Copyable { var x: Int } +struct S { + var i: Int + var s: String +} + // globalArray sil_global @globalArray : $Storage @@ -1667,3 +1672,35 @@ bb3: return %r : $() } +// CHECK-LABEL: sil [ossa] @store_and_load_borrow : +// CHECK: bb1({{.*}}): +// CHECK: store %1 to [trivial] +// CHECK: load_borrow +// CHECK: bb2: +// CHECK-LABEL: } // end sil function 'store_and_load_borrow' +sil [ossa] @store_and_load_borrow : $@convention(thin) (@inout S, Int) -> () { +bb0(%0 : $*S, %1 : $Int): + %2 = load_borrow %0 + %3 = struct_element_addr %0, #S.i + br bb1(%2) + +bb1(%4 : @reborrow $S): + %5 = borrowed %4 from () + end_borrow %5 + store %1 to [trivial] %3 + %10 = load_borrow %0 + cond_br undef, bb2, bb3 + +bb2: + br bb1(%10) + +bb3: + br bb4(%10) + +bb4(%14 : @reborrow $S): + %15 = borrowed %14 from () + end_borrow %15 + %r = tuple() + return %r +} +