From c5f74041ac5da201ce3d861ab5a93206f7603d98 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Tue, 16 Aug 2022 23:23:10 -0400 Subject: [PATCH 01/15] initial bg movers --- MultiTierDataMovement.md | 117 +++++++++++ cachelib/allocator/BackgroundMover-inl.h | 118 +++++++++++ cachelib/allocator/BackgroundMover.h | 101 +++++++++ cachelib/allocator/BackgroundMoverStrategy.h | 33 +++ cachelib/allocator/CMakeLists.txt | 1 + cachelib/allocator/Cache.h | 6 + cachelib/allocator/CacheAllocator-inl.h | 147 ++++++++++++- cachelib/allocator/CacheAllocator.h | 206 ++++++++++++++++++- cachelib/allocator/CacheAllocatorConfig.h | 63 ++++++ cachelib/allocator/CacheStats.h | 26 +++ cachelib/allocator/FreeThresholdStrategy.cpp | 67 ++++++ cachelib/allocator/FreeThresholdStrategy.h | 43 ++++ cachelib/allocator/MMLru-inl.h | 9 + cachelib/allocator/MMLru.h | 3 + cachelib/allocator/MMTinyLFU-inl.h | 8 + cachelib/allocator/MMTinyLFU.h | 3 + cachelib/allocator/PromotionStrategy.h | 81 ++++++++ cachelib/allocator/tests/CacheBaseTest.cpp | 2 + cachelib/cachebench/cache/Cache-inl.h | 34 +++ cachelib/cachebench/cache/CacheStats.h | 59 ++++++ cachelib/cachebench/util/CacheConfig.cpp | 46 ++++- cachelib/cachebench/util/CacheConfig.h | 27 +++ 22 files changed, 1176 insertions(+), 24 deletions(-) create mode 100644 MultiTierDataMovement.md create mode 100644 cachelib/allocator/BackgroundMover-inl.h create mode 100644 cachelib/allocator/BackgroundMover.h create mode 100644 cachelib/allocator/BackgroundMoverStrategy.h create mode 100644 cachelib/allocator/FreeThresholdStrategy.cpp create mode 100644 cachelib/allocator/FreeThresholdStrategy.h create mode 100644 cachelib/allocator/PromotionStrategy.h diff --git a/MultiTierDataMovement.md b/MultiTierDataMovement.md new file mode 100644 index 0000000000..d116f210a0 --- /dev/null +++ b/MultiTierDataMovement.md @@ -0,0 +1,117 @@ +# Background Data Movement + +In order to reduce the number of online evictions and support asynchronous +promotion - we have added two periodic workers to handle eviction and promotion. + +The diagram below shows a simplified version of how the background evictor +thread (green) is integrated to the CacheLib architecture. + +

+ BackgroundEvictor +

+ +## Synchronous Eviction and Promotion + +- `disableEvictionToMemory`: Disables eviction to memory (item is always evicted to NVMe or removed +on eviction) + +## Background Evictors + +The background evictors scan each class to see if there are objects to move the next (lower) +tier using a given strategy. Here we document the parameters for the different +strategies and general parameters. + +- `backgroundEvictorIntervalMilSec`: The interval that this thread runs for - by default +the background evictor threads will wake up every 10 ms to scan the AllocationClasses. Also, +the background evictor thead will be woken up everytime there is a failed allocation (from +a request handling thread) and the current percentage of free memory for the +AllocationClass is lower than `lowEvictionAcWatermark`. This may render the interval parameter +not as important when there are many allocations occuring from request handling threads. + +- `evictorThreads`: The number of background evictors to run - each thread is a assigned +a set of AllocationClasses to scan and evict objects from. Currently, each thread gets +an equal number of classes to scan - but as object size distribution may be unequal - future +versions will attempt to balance the classes among threads. The range is 1 to number of AllocationClasses. +The default is 1. + +- `maxEvictionBatch`: The number of objects to remove in a given eviction call. The +default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not +remove objects at a reasonable rate, too high and it might increase contention with user threads. + +- `minEvictionBatch`: Minimum number of items to evict at any time (if there are any +candidates) + +- `maxEvictionPromotionHotness`: Maximum candidates to consider for eviction. This is similar to `maxEvictionBatch` +but it specifies how many candidates will be taken into consideration, not the actual number of items to evict. +This option can be used to configure duration of critical section on LRU lock. + + +### FreeThresholdStrategy (default) + +- `lowEvictionAcWatermark`: Triggers background eviction thread to run +when this percentage of the AllocationClass is free. +The default is `2.0`, to avoid wasting capacity we don't set this above `10.0`. + +- `highEvictionAcWatermark`: Stop the evictions from an AllocationClass when this +percentage of the AllocationClass is free. The default is `5.0`, to avoid wasting capacity we +don't set this above `10`. + + +## Background Promoters + +The background promotes scan each class to see if there are objects to move to a lower +tier using a given strategy. Here we document the parameters for the different +strategies and general parameters. + +- `backgroundPromoterIntervalMilSec`: The interval that this thread runs for - by default +the background promoter threads will wake up every 10 ms to scan the AllocationClasses for +objects to promote. + +- `promoterThreads`: The number of background promoters to run - each thread is a assigned +a set of AllocationClasses to scan and promote objects from. Currently, each thread gets +an equal number of classes to scan - but as object size distribution may be unequal - future +versions will attempt to balance the classes among threads. The range is `1` to number of AllocationClasses. The default is `1`. + +- `maxProtmotionBatch`: The number of objects to promote in a given promotion call. The +default is 40. Lower range is 10 and the upper range is 1000. Too low and we might not +remove objects at a reasonable rate, too high and it might increase contention with user threads. + +- `minPromotionBatch`: Minimum number of items to promote at any time (if there are any +candidates) + +- `numDuplicateElements`: This allows us to promote items that have existing handles (read-only) since +we won't need to modify the data when a user is done with the data. Therefore, for a short time +the data could reside in both tiers until it is evicted from its current tier. The default is to +not allow this (0). Setting the value to 100 will enable duplicate elements in tiers. + +### Background Promotion Strategy (only one currently) + +- `promotionAcWatermark`: Promote items if there is at least this +percent of free AllocationClasses. Promotion thread will attempt to move `maxPromotionBatch` number of objects +to that tier. The objects are chosen from the head of the LRU. The default is `4.0`. +This value should correlate with `lowEvictionAcWatermark`, `highEvictionAcWatermark`, `minAcAllocationWatermark`, `maxAcAllocationWatermark`. +- `maxPromotionBatch`: The number of objects to promote in batch during BG promotion. Analogous to +`maxEvictionBatch`. It's value should be lower to decrease contention on hot items. + +## Allocation policies + +- `maxAcAllocationWatermark`: Item is always allocated in topmost tier if at least this +percentage of the AllocationClass is free. +- `minAcAllocationWatermark`: Item is always allocated in bottom tier if only this percent +of the AllocationClass is free. If percentage of free AllocationClasses is between `maxAcAllocationWatermark` +and `minAcAllocationWatermark`: then extra checks (described below) are performed to decide where to put the element. + +By default, allocation will always be performed from the upper tier. + +- `acTopTierEvictionWatermark`: If there is less that this percent of free memory in topmost tier, cachelib will attempt to evict from top tier. This option takes precedence before allocationWatermarks. + +### Extra policies (used only when percentage of free AllocationClasses is between `maxAcAllocationWatermark` +and `minAcAllocationWatermark`) +- `sizeThresholdPolicy`: If item is smaller than this value, always allocate it in upper tier. +- `defaultTierChancePercentage`: Change (0-100%) of allocating item in top tier + +## MMContainer options + +- `lruInsertionPointSpec`: Can be set per tier when LRU2Q is used. Determines where new items are +inserted. 0 = insert to hot queue, 1 = insert to warm queue, 2 = insert to cold queue +- `markUsefulChance`: Per-tier, determines chance of moving item to the head of LRU on access diff --git a/cachelib/allocator/BackgroundMover-inl.h b/cachelib/allocator/BackgroundMover-inl.h new file mode 100644 index 0000000000..04adf4c92d --- /dev/null +++ b/cachelib/allocator/BackgroundMover-inl.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) Intel and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace facebook { +namespace cachelib { + + +template +BackgroundMover::BackgroundMover(Cache& cache, + std::shared_ptr strategy, + MoverDir direction) + : cache_(cache), + strategy_(strategy), + direction_(direction) +{ + if (direction_ == MoverDir::Evict) { + moverFunc = + BackgroundMoverAPIWrapper::traverseAndEvictItems; + + } else if (direction_ == MoverDir::Promote) { + moverFunc = + BackgroundMoverAPIWrapper::traverseAndPromoteItems; + } +} + +template +BackgroundMover::~BackgroundMover() { stop(std::chrono::seconds(0)); } + +template +void BackgroundMover::work() { + try { + checkAndRun(); + } catch (const std::exception& ex) { + XLOGF(ERR, "BackgroundMover interrupted due to exception: {}", ex.what()); + } +} + +template +void BackgroundMover::setAssignedMemory(std::vector> &&assignedMemory) +{ + XLOG(INFO, "Class assigned to background worker:"); + for (auto [tid, pid, cid] : assignedMemory) { + XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid); + } + + mutex.lock_combine([this, &assignedMemory]{ + this->assignedMemory_ = std::move(assignedMemory); + }); +} + +// Look for classes that exceed the target memory capacity +// and return those for eviction +template +void BackgroundMover::checkAndRun() { + auto assignedMemory = mutex.lock_combine([this]{ + return assignedMemory_; + }); + + unsigned int moves = 0; + std::set classes{}; + auto batches = strategy_->calculateBatchSizes(cache_,assignedMemory); + + for (size_t i = 0; i < batches.size(); i++) { + const auto [tid, pid, cid] = assignedMemory[i]; + const auto batch = batches[i]; + + classes.insert(cid); + const auto& mpStats = cache_.getPoolByTid(pid,tid).getStats(); + + if (!batch) { + continue; + } + + totalBytesMoved.add(batch * mpStats.acStats.at(cid).allocSize); + + //try moving BATCH items from the class in order to reach free target + auto moved = moverFunc(cache_,tid,pid,cid,batch); + moves += moved; + moves_per_class_[tid][pid][cid] += moved; + } + + numTraversals.inc(); + numMovedItems.add(moves); + totalClasses.add(classes.size()); +} + +template +BackgroundMoverStats BackgroundMover::getStats() const noexcept { + BackgroundMoverStats stats; + stats.numMovedItems = numMovedItems.get(); + stats.runCount = numTraversals.get(); + stats.totalBytesMoved = totalBytesMoved.get(); + stats.totalClasses = totalClasses.get(); + + return stats; +} + +template +std::map>> +BackgroundMover::getClassStats() const noexcept { + return moves_per_class_; +} + +} // namespace cachelib +} // namespace facebook diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h new file mode 100644 index 0000000000..45593b0f38 --- /dev/null +++ b/cachelib/allocator/BackgroundMover.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) Intel and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "cachelib/allocator/CacheStats.h" +#include "cachelib/common/PeriodicWorker.h" +#include "cachelib/allocator/BackgroundMoverStrategy.h" +#include "cachelib/common/AtomicCounter.h" + + +namespace facebook { +namespace cachelib { + +// wrapper that exposes the private APIs of CacheType that are specifically +// needed for the cache api +template +struct BackgroundMoverAPIWrapper { + + static size_t traverseAndEvictItems(C& cache, + unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { + return cache.traverseAndEvictItems(tid,pid,cid,batch); + } + + static size_t traverseAndPromoteItems(C& cache, + unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { + return cache.traverseAndPromoteItems(tid,pid,cid,batch); + } + +}; + +enum class MoverDir { + Evict = 0, + Promote +}; + +// Periodic worker that evicts items from tiers in batches +// The primary aim is to reduce insertion times for new items in the +// cache +template +class BackgroundMover : public PeriodicWorker { + public: + using Cache = CacheT; + // @param cache the cache interface + // @param strategy the stragey class that defines how objects are moved, + // (promoted vs. evicted and how much) + BackgroundMover(Cache& cache, + std::shared_ptr strategy, + MoverDir direction_); + + ~BackgroundMover() override; + + BackgroundMoverStats getStats() const noexcept; + std::map>> getClassStats() const noexcept; + + void setAssignedMemory(std::vector> &&assignedMemory); + + private: + std::map>> moves_per_class_; + // cache allocator's interface for evicting + using Item = typename Cache::Item; + + Cache& cache_; + std::shared_ptr strategy_; + MoverDir direction_; + + std::function moverFunc; + + // implements the actual logic of running the background evictor + void work() override final; + void checkAndRun(); + + + AtomicCounter numMovedItems{0}; + AtomicCounter numTraversals{0}; + AtomicCounter totalClasses{0}; + AtomicCounter totalBytesMoved{0}; + + std::vector> assignedMemory_; + folly::DistributedMutex mutex; +}; +} // namespace cachelib +} // namespace facebook + +#include "cachelib/allocator/BackgroundMover-inl.h" diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h new file mode 100644 index 0000000000..9a37e8a411 --- /dev/null +++ b/cachelib/allocator/BackgroundMoverStrategy.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cachelib/allocator/Cache.h" + +namespace facebook { +namespace cachelib { + +// Base class for background eviction strategy. +class BackgroundMoverStrategy { + +public: + virtual std::vector calculateBatchSizes(const CacheBase& cache, + std::vector> acVec) = 0; +}; + +} // namespace cachelib +} // namespace facebook diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index d64fadc932..87643ff006 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -35,6 +35,7 @@ add_library (cachelib_allocator CCacheManager.cpp ContainerTypes.cpp FreeMemStrategy.cpp + FreeThresholdStrategy.cpp HitsPerSlabStrategy.cpp LruTailAgeStrategy.cpp MarginalHitsOptimizeStrategy.cpp diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index a7a97467ab..589614ee3b 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -96,6 +96,12 @@ class CacheBase { // // @param poolId The pool id to query virtual const MemoryPool& getPool(PoolId poolId) const = 0; + + // Get the reference to a memory pool using a tier id, for stats purposes + // + // @param poolId The pool id to query + // @param tierId The tier of the pool id + virtual const MemoryPool& getPoolByTid(PoolId poolId, TierId tid) const = 0; // Get Pool specific stats (regular pools). This includes stats from the // Memory Pool and also the cache. diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 1b494d15bb..1c8882ffef 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -383,6 +383,22 @@ CacheAllocator::allocate(PoolId poolId, ttlSecs == 0 ? 0 : creationTime + ttlSecs); } +template +bool CacheAllocator::shouldWakeupBgEvictor(TierId tid, PoolId pid, ClassId cid) { + // TODO: should we also work on lower tiers? should we have separate set of params? + if (tid == 1) return false; + return getAllocationClassStats(tid, pid, cid).approxFreePercent <= config_.lowEvictionAcWatermark; +} + +template +size_t CacheAllocator::backgroundWorkerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers) { + XDCHECK(numWorkers); + + // TODO: came up with some better sharding (use some hashing) + return (tid + pid + cid) % numWorkers; +} + + template typename CacheAllocator::WriteHandle CacheAllocator::allocateInternalTier(TierId tid, @@ -390,7 +406,8 @@ CacheAllocator::allocateInternalTier(TierId tid, typename Item::Key key, uint32_t size, uint32_t creationTime, - uint32_t expiryTime) { + uint32_t expiryTime, + bool fromBgThread) { util::LatencyTracker tracker{stats().allocateLatency_}; SCOPE_FAIL { stats_.invalidAllocs.inc(); }; @@ -404,8 +421,12 @@ CacheAllocator::allocateInternalTier(TierId tid, // TODO: per-tier (*stats_.allocAttempts)[pid][cid].inc(); - + void* memory = allocator_[tid]->allocate(pid, requiredSize); + + if (backgroundEvictor_.size() && !fromBgThread && (memory == nullptr || shouldWakeupBgEvictor(tid, pid, cid))) { + backgroundEvictor_[backgroundWorkerId(tid, pid, cid, backgroundEvictor_.size())]->wakeUp(); + } // TODO: Today disableEviction means do not evict from memory (DRAM). // Should we support eviction between memory tiers (e.g. from DRAM to PMEM)? if (memory == nullptr && !config_.isEvictionDisabled()) { @@ -454,10 +475,11 @@ CacheAllocator::allocateInternal(PoolId pid, typename Item::Key key, uint32_t size, uint32_t creationTime, - uint32_t expiryTime) { + uint32_t expiryTime, + bool fromBgThread) { auto tid = 0; /* TODO: consult admission policy */ for(TierId tid = 0; tid < getNumTiers(); ++tid) { - auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime); + auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime, fromBgThread); if (handle) return handle; } return {}; @@ -1639,7 +1661,7 @@ bool CacheAllocator::shouldWriteToNvmCacheExclusive( template typename CacheAllocator::WriteHandle CacheAllocator::tryEvictToNextMemoryTier( - TierId tid, PoolId pid, Item& item) { + TierId tid, PoolId pid, Item& item, bool fromBgThread) { if(item.isChainedItem()) return {}; // TODO: We do not support ChainedItem yet if(item.isExpired()) return acquire(&item); @@ -1650,7 +1672,8 @@ CacheAllocator::tryEvictToNextMemoryTier( item.getKey(), item.getSize(), item.getCreationTime(), - item.getExpiryTime()); + item.getExpiryTime(), + fromBgThread); if (newItemHdl) { XDCHECK_EQ(newItemHdl->getSize(), item.getSize()); @@ -1663,12 +1686,52 @@ CacheAllocator::tryEvictToNextMemoryTier( template typename CacheAllocator::WriteHandle -CacheAllocator::tryEvictToNextMemoryTier(Item& item) { +CacheAllocator::tryEvictToNextMemoryTier(Item& item, bool fromBgThread) { auto tid = getTierId(item); auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId; - return tryEvictToNextMemoryTier(tid, pid, item); + return tryEvictToNextMemoryTier(tid, pid, item, fromBgThread); } +template +bool +CacheAllocator::tryPromoteToNextMemoryTier( + TierId tid, PoolId pid, Item& item, bool fromBgThread) { + TierId nextTier = tid; + while (nextTier > 0) { // try to evict down to the next memory tiers + auto toPromoteTier = nextTier - 1; + --nextTier; + + // allocateInternal might trigger another eviction + auto newItemHdl = allocateInternalTier(toPromoteTier, pid, + item.getKey(), + item.getSize(), + item.getCreationTime(), + item.getExpiryTime(), + fromBgThread); + + if (newItemHdl) { + XDCHECK_EQ(newItemHdl->getSize(), item.getSize()); + auto predicate = [&](const Item& item){ + return item.getRefCount() == 0 || config_.numDuplicateElements > 0; + }; + if (moveRegularItemWithSync(item, newItemHdl, predicate)) { + return true; + } + } + } + + return false; +} + +template +bool +CacheAllocator::tryPromoteToNextMemoryTier(Item& item, bool fromBgThread) { + auto tid = getTierId(item); + auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId; + return tryPromoteToNextMemoryTier(tid, pid, item, fromBgThread); +} + + template typename CacheAllocator::RemoveRes CacheAllocator::remove(typename Item::Key key) { @@ -2910,7 +2973,8 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { oldItem.getKey(), oldItem.getSize(), oldItem.getCreationTime(), - oldItem.getExpiryTime()); + oldItem.getExpiryTime(), + false); if (!newItemHdl) { return {}; } @@ -3043,14 +3107,15 @@ void CacheAllocator::evictForSlabRelease( template typename CacheAllocator::WriteHandle CacheAllocator::evictNormalItem(Item& item, - bool skipIfTokenInvalid) { + bool skipIfTokenInvalid, + bool fromBgThread) { XDCHECK(item.isMoving()); if (item.isOnlyMoving()) { return WriteHandle{}; } - auto evictHandle = tryEvictToNextMemoryTier(item); + auto evictHandle = tryEvictToNextMemoryTier(item, fromBgThread); if(evictHandle) return evictHandle; auto predicate = [](const Item& it) { return it.getRefCount() == 0; }; @@ -3698,6 +3763,8 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { ret.nvmUpTime = currTime - nvmCacheState_.getCreationTime(); ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false; ret.reaperStats = getReaperStats(); + ret.evictionStats = getBackgroundMoverStats(MoverDir::Evict); + ret.promotionStats = getBackgroundMoverStats(MoverDir::Promote); ret.numActiveHandles = getNumActiveHandles(); ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_; @@ -3847,6 +3914,64 @@ bool CacheAllocator::startNewReaper( return startNewWorker("Reaper", reaper_, interval, reaperThrottleConfig); } +template +auto CacheAllocator::getAssignedMemoryToBgWorker(size_t evictorId, size_t numWorkers, TierId tid) +{ + std::vector> asssignedMemory; + // TODO: for now, only evict from tier 0 + auto pools = filterCompactCachePools(allocator_[tid]->getPoolIds()); + for (const auto pid : pools) { + const auto& mpStats = getPoolByTid(pid,tid).getStats(); + for (const auto cid : mpStats.classIds) { + if (backgroundWorkerId(tid, pid, cid, numWorkers) == evictorId) { + asssignedMemory.emplace_back(tid, pid, cid); + } + } + } + return asssignedMemory; +} + +template +bool CacheAllocator::startNewBackgroundEvictor( + std::chrono::milliseconds interval, + std::shared_ptr strategy, + size_t threads) { + XDCHECK(threads > 0); + backgroundEvictor_.resize(threads); + bool result = true; + + for (size_t i = 0; i < threads; i++) { + auto ret = startNewWorker("BackgroundEvictor" + std::to_string(i), backgroundEvictor_[i], interval, strategy, MoverDir::Evict); + result = result && ret; + + if (result) { + backgroundEvictor_[i]->setAssignedMemory(getAssignedMemoryToBgWorker(i, backgroundEvictor_.size(), 0)); + } + } + return result; +} + +template +bool CacheAllocator::startNewBackgroundPromoter( + std::chrono::milliseconds interval, + std::shared_ptr strategy, + size_t threads) { + XDCHECK(threads > 0); + XDCHECK(getNumTiers() > 1); + backgroundPromoter_.resize(threads); + bool result = true; + + for (size_t i = 0; i < threads; i++) { + auto ret = startNewWorker("BackgroundPromoter" + std::to_string(i), backgroundPromoter_[i], interval, strategy, MoverDir::Promote); + result = result && ret; + + if (result) { + backgroundPromoter_[i]->setAssignedMemory(getAssignedMemoryToBgWorker(i, backgroundPromoter_.size(), 1)); + } + } + return result; +} + template bool CacheAllocator::stopPoolRebalancer( std::chrono::seconds timeout) { diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 02557dfe24..a65f14d42c 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -39,6 +39,7 @@ #include #pragma GCC diagnostic pop +#include "cachelib/allocator/BackgroundMover.h" #include "cachelib/allocator/CCacheManager.h" #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/CacheAllocatorConfig.h" @@ -659,6 +660,11 @@ class CacheAllocator : public CacheBase { // @return the full usable size for this item uint32_t getUsableSize(const Item& item) const; + // gets the allocation class assigned to BG worker + auto getAssignedMemoryToBgWorker(size_t evictorId, size_t numWorkers, TierId tid); + bool shouldWakeupBgEvictor(TierId tid, PoolId pid, ClassId cid); + size_t backgroundWorkerId(TierId tid, PoolId pid, ClassId cid, size_t numWorkers); + // Get a random item from memory // This is useful for profiling and sampling cachelib managed memory // @@ -1004,6 +1010,11 @@ class CacheAllocator : public CacheBase { // @param reaperThrottleConfig throttling config bool startNewReaper(std::chrono::milliseconds interval, util::Throttler::Config reaperThrottleConfig); + + bool startNewBackgroundPromoter(std::chrono::milliseconds interval, + std::shared_ptr strategy, size_t threads); + bool startNewBackgroundEvictor(std::chrono::milliseconds interval, + std::shared_ptr strategy, size_t threads); // Stop existing workers with a timeout bool stopPoolRebalancer(std::chrono::seconds timeout = std::chrono::seconds{ @@ -1048,6 +1059,10 @@ class CacheAllocator : public CacheBase { return allocator_[currentTier()]->getPool(pid); } + const MemoryPool& getPoolByTid(PoolId pid, TierId tid) const override final { + return allocator_[tid]->getPool(pid); + } + // calculate the number of slabs to be advised/reclaimed in each pool PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final { auto regularPoolIds = getRegularPoolIds(); @@ -1098,6 +1113,52 @@ class CacheAllocator : public CacheBase { auto stats = reaper_ ? reaper_->getStats() : ReaperStats{}; return stats; } + + // returns the background mover stats + BackgroundMoverStats getBackgroundMoverStats(MoverDir direction) const { + + auto stats = BackgroundMoverStats{}; + if (direction == MoverDir::Evict) { + for (auto &bg : backgroundEvictor_) + stats += bg->getStats(); + } else if (direction == MoverDir::Promote) { + for (auto &bg : backgroundPromoter_) + stats += bg->getStats(); + } + return stats; + + } + + + std::map>> + getBackgroundMoverClassStats(MoverDir direction) const { + std::map>> stats; + + if (direction == MoverDir::Evict) { + for (auto &bg : backgroundEvictor_) { + for (auto &tid : bg->getClassStats()) { + for (auto &pid : tid.second) { + for (auto &cid : pid.second) { + stats[tid.first][pid.first][cid.first] += cid.second; + } + } + } + } + } else if (direction == MoverDir::Promote) { + for (auto &bg : backgroundPromoter_) { + for (auto &tid : bg->getClassStats()) { + for (auto &pid : tid.second) { + for (auto &cid : pid.second) { + stats[tid.first][pid.first][cid.first] += cid.second; + } + } + } + } + } + + return stats; + } + // return the LruType of an item typename MMType::LruType getItemLruType(const Item& item) const; @@ -1393,7 +1454,8 @@ class CacheAllocator : public CacheBase { Key key, uint32_t size, uint32_t creationTime, - uint32_t expiryTime); + uint32_t expiryTime, + bool fromBgThread = false); // create a new cache allocation on specific memory tier. // For description see allocateInternal. @@ -1404,7 +1466,8 @@ class CacheAllocator : public CacheBase { Key key, uint32_t size, uint32_t creationTime, - uint32_t expiryTime); + uint32_t expiryTime, + bool fromBgThread); // Allocate a chained item // @@ -1668,7 +1731,11 @@ class CacheAllocator : public CacheBase { // // @return valid handle to the item. This will be the last // handle to the item. On failure an empty handle. - WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item); + WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item, bool fromBgThread); + + bool tryPromoteToNextMemoryTier(TierId tid, PoolId pid, Item& item, bool fromBgThread); + + bool tryPromoteToNextMemoryTier(Item& item, bool fromBgThread); // Try to move the item down to the next memory tier // @@ -1676,7 +1743,7 @@ class CacheAllocator : public CacheBase { // // @return valid handle to the item. This will be the last // handle to the item. On failure an empty handle. - WriteHandle tryEvictToNextMemoryTier(Item& item); + WriteHandle tryEvictToNextMemoryTier(Item& item, bool fromBgThread); size_t memoryTierSize(TierId tid) const; @@ -1797,7 +1864,7 @@ class CacheAllocator : public CacheBase { // // @return last handle for corresponding to item on success. empty handle on // failure. caller can retry if needed. - WriteHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false); + WriteHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false, bool fromBgThread = false); // Helper function to evict a child item for slab release // As a side effect, the parent item is also evicted @@ -1827,6 +1894,130 @@ class CacheAllocator : public CacheBase { stats().numSkippedSlabReleases.add(slabsSkipped); } + // exposed for the background evictor to iterate through the memory and evict + // in batch. This should improve insertion path for tiered memory config + size_t traverseAndEvictItems(unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { + auto& mmContainer = getMMContainer(tid, pid, cid); + size_t evictions = 0; + size_t evictionCandidates = 0; + std::vector candidates; + candidates.reserve(batch); + + size_t tries = 0; + mmContainer.withEvictionIterator([&tries, &candidates, &batch, this](auto &&itr){ + while (candidates.size() < batch && (config_.maxEvictionPromotionHotness == 0 || tries < config_.maxEvictionPromotionHotness) && itr) { + tries++; + Item* candidate = itr.get(); + XDCHECK(candidate); + + if (candidate->isChainedItem()) { + throw std::runtime_error("Not supported for chained items"); + } + + if (candidate->getRefCount() == 0 && candidate->markMoving()) { + candidates.push_back(candidate); + } + + ++itr; + } + }); + + for (Item *candidate : candidates) { + auto toReleaseHandle = + evictNormalItem(*candidate, true /* skipIfTokenInvalid */, true /* from BG thread */); + auto ref = candidate->unmarkMoving(); + + if (toReleaseHandle || ref == 0u) { + if (candidate->hasChainedItem()) { + (*stats_.chainedItemEvictions)[pid][cid].inc(); + } else { + (*stats_.regularItemEvictions)[pid][cid].inc(); + } + + evictions++; + } else { + if (candidate->hasChainedItem()) { + stats_.evictFailParentAC.inc(); + } else { + stats_.evictFailAC.inc(); + } + } + + if (toReleaseHandle) { + XDCHECK(toReleaseHandle.get() == candidate); + XDCHECK_EQ(1u, toReleaseHandle->getRefCount()); + + // We manually release the item here because we don't want to + // invoke the Item Handle's destructor which will be decrementing + // an already zero refcount, which will throw exception + auto& itemToRelease = *toReleaseHandle.release(); + + // Decrementing the refcount because we want to recycle the item + const auto ref = decRef(itemToRelease); + XDCHECK_EQ(0u, ref); + + auto res = releaseBackToAllocator(*candidate, RemoveContext::kEviction, + /* isNascent */ false); + XDCHECK(res == ReleaseRes::kReleased); + } else if (ref == 0u) { + // it's safe to recycle the item here as there are no more + // references and the item could not been marked as moving + // by other thread since it's detached from MMContainer. + auto res = releaseBackToAllocator(*candidate, RemoveContext::kEviction, + /* isNascent */ false); + XDCHECK(res == ReleaseRes::kReleased); + } + } + + return evictions; + } + + size_t traverseAndPromoteItems(unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { + auto& mmContainer = getMMContainer(tid, pid, cid); + size_t promotions = 0; + std::vector candidates; + candidates.reserve(batch); + + size_t tries = 0; + + mmContainer.withPromotionIterator([&tries, &candidates, &batch, this](auto &&itr){ + while (candidates.size() < batch && (config_.maxEvictionPromotionHotness == 0 || tries < config_.maxEvictionPromotionHotness) && itr) { + tries++; + Item* candidate = itr.get(); + XDCHECK(candidate); + + if (candidate->isChainedItem()) { + throw std::runtime_error("Not supported for chained items"); + } + + + // TODO: only allow it for read-only items? + // or implement mvcc + if (!candidate->isExpired() && candidate->markMoving()) { + candidates.push_back(candidate); + } + + ++itr; + } + }); + + for (Item *candidate : candidates) { + auto promoted = tryPromoteToNextMemoryTier(*candidate, true); + auto ref = candidate->unmarkMoving(); + if (promoted) + promotions++; + + if (ref == 0u) { + // stats_.promotionMoveSuccess.inc(); + auto res = releaseBackToAllocator(*candidate, RemoveContext::kEviction, + /* isNascent */ false); + XDCHECK(res == ReleaseRes::kReleased); + } + } + + return promotions; + } + // returns true if nvmcache is enabled and we should write this item to // nvmcache. bool shouldWriteToNvmCache(const Item& item); @@ -2153,6 +2344,10 @@ class CacheAllocator : public CacheBase { // free memory monitor std::unique_ptr memMonitor_; + + // background evictor + std::vector>> backgroundEvictor_; + std::vector>> backgroundPromoter_; // check whether a pool is a slabs pool std::array isCompactCachePool_{}; @@ -2214,6 +2409,7 @@ class CacheAllocator : public CacheBase { // Make this friend to give access to acquire and release friend ReadHandle; friend ReaperAPIWrapper; + friend BackgroundMoverAPIWrapper; friend class CacheAPIWrapperForNvm; friend class FbInternalRuntimeUpdateWrapper; friend class objcache2::ObjectCache; diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index c0a70139ce..3dedf6337c 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -31,6 +31,7 @@ #include "cachelib/allocator/MemoryTierCacheConfig.h" #include "cachelib/allocator/NvmAdmissionPolicy.h" #include "cachelib/allocator/PoolOptimizeStrategy.h" +#include "cachelib/allocator/BackgroundMoverStrategy.h" #include "cachelib/allocator/RebalanceStrategy.h" #include "cachelib/allocator/Util.h" #include "cachelib/common/EventInterface.h" @@ -267,6 +268,16 @@ class CacheAllocatorConfig { std::chrono::seconds regularInterval, std::chrono::seconds ccacheInterval, uint32_t ccacheStepSizePercent); + + // Enable the background evictor - scans a tier to look for objects + // to evict to the next tier + CacheAllocatorConfig& enableBackgroundEvictor( + std::shared_ptr backgroundMoverStrategy, + std::chrono::milliseconds regularInterval, size_t threads); + + CacheAllocatorConfig& enableBackgroundPromoter( + std::shared_ptr backgroundMoverStrategy, + std::chrono::milliseconds regularInterval, size_t threads); // This enables an optimization for Pool rebalancing and resizing. // The rough idea is to ensure only the least useful items are evicted when @@ -464,6 +475,16 @@ class CacheAllocatorConfig { // The slab release process is considered as being stuck if it does not // make any progress for the below threshold std::chrono::milliseconds slabReleaseStuckThreshold{std::chrono::seconds(60)}; + + // rebalance to avoid alloc fialures. + std::shared_ptr backgroundEvictorStrategy; + std::shared_ptr backgroundPromoterStrategy; + // time interval to sleep between runs of the background evictor + std::chrono::milliseconds backgroundEvictorInterval{std::chrono::milliseconds{1000}}; + std::chrono::milliseconds backgroundPromoterInterval{std::chrono::milliseconds{1000}}; + + size_t backgroundEvictorThreads{1}; + size_t backgroundPromoterThreads{1}; // time interval to sleep between iterations of pool size optimization, // for regular pools and compact caches @@ -603,6 +624,28 @@ class CacheAllocatorConfig { // If true, we will delay worker start until user explicitly calls // CacheAllocator::startCacheWorkers() bool delayCacheWorkersStart{false}; + + + double minAcAllocationWatermark{0.0}; + double maxAcAllocationWatermark{0.0}; + + double promotionAcWatermark{4.0}; + double lowEvictionAcWatermark{2.0}; + double highEvictionAcWatermark{5.0}; + double numDuplicateElements{0.0}; // inclusivness of the cache + double syncPromotion{0.0}; // can promotion be done synchronously in user thread + + uint64_t evictorThreads{1}; + uint64_t promoterThreads{1}; + + uint64_t maxEvictionBatch{40}; + uint64_t maxPromotionBatch{10}; + + uint64_t minEvictionBatch{1}; + uint64_t minPromotionBatch{1}; + + uint64_t maxEvictionPromotionHotness{60}; + friend CacheT; @@ -951,6 +994,26 @@ CacheAllocatorConfig& CacheAllocatorConfig::enablePoolRebalancing( return *this; } +template +CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundEvictor( + std::shared_ptr strategy, + std::chrono::milliseconds interval, size_t evictorThreads) { + backgroundEvictorStrategy = strategy; + backgroundEvictorInterval = interval; + backgroundEvictorThreads = evictorThreads; + return *this; +} + +template +CacheAllocatorConfig& CacheAllocatorConfig::enableBackgroundPromoter( + std::shared_ptr strategy, + std::chrono::milliseconds interval, size_t promoterThreads) { + backgroundPromoterStrategy = strategy; + backgroundPromoterInterval = interval; + backgroundPromoterThreads = promoterThreads; + return *this; +} + template CacheAllocatorConfig& CacheAllocatorConfig::enablePoolResizing( std::shared_ptr resizeStrategy, diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index edd1d8a4cb..df718ab8c3 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -304,6 +304,27 @@ struct ReaperStats { uint64_t avgTraversalTimeMs{0}; }; +// Mover Stats +struct BackgroundMoverStats { + // the number of items this worker moved by looking at pools/classes stats + uint64_t numMovedItems{0}; + // number of times we went executed the thread //TODO: is this def correct? + uint64_t runCount{0}; + // total number of classes + uint64_t totalClasses{0}; + // eviction size + uint64_t totalBytesMoved{0}; + + BackgroundMoverStats& operator+=(const BackgroundMoverStats& rhs) { + numMovedItems += rhs.numMovedItems; + runCount += rhs.runCount; + totalClasses += rhs.totalClasses; + totalBytesMoved += rhs.totalBytesMoved; + return *this; + } +}; + + // CacheMetadata type to export struct CacheMetadata { // allocator_version @@ -324,6 +345,11 @@ struct Stats; // Stats that apply globally in cache and // the ones that are aggregated over all pools struct GlobalCacheStats { + // background eviction stats + BackgroundMoverStats evictionStats; + + BackgroundMoverStats promotionStats; + // number of calls to CacheAllocator::find uint64_t numCacheGets{0}; diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp new file mode 100644 index 0000000000..5ffc718fa7 --- /dev/null +++ b/cachelib/allocator/FreeThresholdStrategy.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) Intel and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cachelib/allocator/FreeThresholdStrategy.h" + +#include + +namespace facebook { +namespace cachelib { + + + +FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark, double highEvictionAcWatermark, uint64_t maxEvictionBatch, uint64_t minEvictionBatch) + : lowEvictionAcWatermark(lowEvictionAcWatermark), highEvictionAcWatermark(highEvictionAcWatermark), maxEvictionBatch(maxEvictionBatch), minEvictionBatch(minEvictionBatch) {} + +std::vector FreeThresholdStrategy::calculateBatchSizes( + const CacheBase& cache, std::vector> acVec) { + std::vector batches{}; + for (auto [tid, pid, cid] : acVec) { + auto stats = cache.getAllocationClassStats(tid, pid, cid); + if (stats.approxFreePercent >= highEvictionAcWatermark) { + batches.push_back(0); + } else { + auto toFreeMemPercent = highEvictionAcWatermark - stats.approxFreePercent; + auto toFreeItems = static_cast(toFreeMemPercent * stats.memorySize / stats.allocSize); + batches.push_back(toFreeItems); + } + } + + if (batches.size() == 0) { + return batches; + } + + auto maxBatch = *std::max_element(batches.begin(), batches.end()); + if (maxBatch == 0) + return batches; + + std::transform(batches.begin(), batches.end(), batches.begin(), [&](auto numItems){ + if (numItems == 0) { + return 0UL; + } + + auto cappedBatchSize = maxEvictionBatch * numItems / maxBatch; + if (cappedBatchSize < minEvictionBatch) + return minEvictionBatch; + else + return cappedBatchSize; + }); + + return batches; +} + +} // namespace cachelib +} // namespace facebook diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h new file mode 100644 index 0000000000..babd8935cd --- /dev/null +++ b/cachelib/allocator/FreeThresholdStrategy.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cachelib/allocator/Cache.h" +#include "cachelib/allocator/BackgroundMoverStrategy.h" + +namespace facebook { +namespace cachelib { + + +// Base class for background mover strategy. +class FreeThresholdStrategy : public BackgroundMoverStrategy { + +public: + FreeThresholdStrategy(double lowEvictionAcWatermark, double highEvictionAcWatermark, uint64_t maxEvictionBatch, uint64_t minEvictionBatch); + ~FreeThresholdStrategy() {} + + std::vector calculateBatchSizes(const CacheBase& cache, + std::vector> acVecs); +private: + double lowEvictionAcWatermark{2.0}; + double highEvictionAcWatermark{5.0}; + uint64_t maxEvictionBatch{40}; + uint64_t minEvictionBatch{5}; +}; + +} // namespace cachelib +} // namespace facebook diff --git a/cachelib/allocator/MMLru-inl.h b/cachelib/allocator/MMLru-inl.h index 25751f188b..ab35030d01 100644 --- a/cachelib/allocator/MMLru-inl.h +++ b/cachelib/allocator/MMLru-inl.h @@ -227,6 +227,15 @@ MMLru::Container::withEvictionIterator(F&& fun) { }); } +template T::*HookPtr> +template +void +MMLru::Container::withPromotionIterator(F&& fun) { + lruMutex_->lock_combine([this, &fun]() { + fun(Iterator{LockHolder{}, lru_.begin()}); + }); +} + template T::*HookPtr> void MMLru::Container::ensureNotInsertionPoint(T& node) noexcept { // If we are removing the insertion point node, grow tail before we remove diff --git a/cachelib/allocator/MMLru.h b/cachelib/allocator/MMLru.h index 0ba27db3a4..fed847f1ae 100644 --- a/cachelib/allocator/MMLru.h +++ b/cachelib/allocator/MMLru.h @@ -337,6 +337,9 @@ class MMLru { template void withEvictionIterator(F&& f); + template + void withPromotionIterator(F&& f); + // get copy of current config Config getConfig() const; diff --git a/cachelib/allocator/MMTinyLFU-inl.h b/cachelib/allocator/MMTinyLFU-inl.h index f4420177e1..09f4ba6dba 100644 --- a/cachelib/allocator/MMTinyLFU-inl.h +++ b/cachelib/allocator/MMTinyLFU-inl.h @@ -228,6 +228,14 @@ MMTinyLFU::Container::withEvictionIterator(F&& fun) { fun(Iterator{LockHolder{}, *this}); } +template T::*HookPtr> +template +void +MMTinyLFU::Container::withPromotionIterator(F&& fun) { + throw std::runtime_error("Not supported"); +} + + template T::*HookPtr> void MMTinyLFU::Container::removeLocked(T& node) noexcept { diff --git a/cachelib/allocator/MMTinyLFU.h b/cachelib/allocator/MMTinyLFU.h index 40886d53af..0f0a245064 100644 --- a/cachelib/allocator/MMTinyLFU.h +++ b/cachelib/allocator/MMTinyLFU.h @@ -495,6 +495,9 @@ class MMTinyLFU { // iterator passed as parameter. template void withEvictionIterator(F&& f); + + template + void withPromotionIterator(F&& f); // for saving the state of the lru // diff --git a/cachelib/allocator/PromotionStrategy.h b/cachelib/allocator/PromotionStrategy.h new file mode 100644 index 0000000000..ad9145282b --- /dev/null +++ b/cachelib/allocator/PromotionStrategy.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "cachelib/allocator/Cache.h" +#include "cachelib/allocator/BackgroundMoverStrategy.h" + +namespace facebook { +namespace cachelib { + + +// Base class for background eviction strategy. +class PromotionStrategy : public BackgroundMoverStrategy { + +public: + PromotionStrategy(uint64_t promotionAcWatermark, uint64_t maxPromotionBatch, uint64_t minPromotionBatch): + promotionAcWatermark(promotionAcWatermark), maxPromotionBatch(maxPromotionBatch), minPromotionBatch(minPromotionBatch) + { + + } + ~PromotionStrategy() {} + + std::vector calculateBatchSizes(const CacheBase& cache, + std::vector> acVec) { + std::vector batches{}; + for (auto [tid, pid, cid] : acVec) { + XDCHECK(tid > 0); + auto stats = cache.getAllocationClassStats(tid - 1, pid, cid); + if (stats.approxFreePercent < promotionAcWatermark) + batches.push_back(0); + else { + auto maxPossibleItemsToPromote = static_cast((promotionAcWatermark - stats.approxFreePercent) * + stats.memorySize / stats.allocSize); + batches.push_back(maxPossibleItemsToPromote); + } + } + + if (batches.size() == 0) { + return batches; + } + + auto maxBatch = *std::max_element(batches.begin(), batches.end()); + if (maxBatch == 0) + return batches; + + std::transform(batches.begin(), batches.end(), batches.begin(), [&](auto numItems){ + if (numItems == 0) { + return 0UL; + } + + auto cappedBatchSize = maxPromotionBatch * numItems / maxBatch; + if (cappedBatchSize < minPromotionBatch) + return minPromotionBatch; + else + return cappedBatchSize; + }); + + return batches; + } +private: + double promotionAcWatermark{4.0}; + uint64_t maxPromotionBatch{40}; + uint64_t minPromotionBatch{5}; +}; + +} // namespace cachelib +} // namespace facebook diff --git a/cachelib/allocator/tests/CacheBaseTest.cpp b/cachelib/allocator/tests/CacheBaseTest.cpp index 89721f3589..420e77885a 100644 --- a/cachelib/allocator/tests/CacheBaseTest.cpp +++ b/cachelib/allocator/tests/CacheBaseTest.cpp @@ -33,6 +33,8 @@ class CacheBaseTest : public CacheBase, public SlabAllocatorTestBase { const std::string getCacheName() const override { return cacheName; } bool isObjectCache() const override { return false; } const MemoryPool& getPool(PoolId) const override { return memoryPool_; } + //TODO: support tiers + const MemoryPool& getPoolByTid(PoolId, TierId tid) const override { return memoryPool_; } PoolStats getPoolStats(PoolId) const override { return PoolStats(); } AllocationClassBaseStat getAllocationClassStats(TierId tid, PoolId, diff --git a/cachelib/cachebench/cache/Cache-inl.h b/cachelib/cachebench/cache/Cache-inl.h index 383355c184..a236fe0f75 100644 --- a/cachelib/cachebench/cache/Cache-inl.h +++ b/cachelib/cachebench/cache/Cache-inl.h @@ -46,6 +46,16 @@ Cache::Cache(const CacheConfig& config, config_.getRebalanceStrategy(), std::chrono::seconds(config_.poolRebalanceIntervalSec)); + allocatorConfig_.enableBackgroundEvictor( + config_.getBackgroundEvictorStrategy(), + std::chrono::milliseconds(config_.backgroundEvictorIntervalMilSec), + config_.evictorThreads); + + allocatorConfig_.enableBackgroundPromoter( + config_.getBackgroundPromoterStrategy(), + std::chrono::milliseconds(config_.backgroundPromoterIntervalMilSec), + config_.promoterThreads); + if (config_.moveOnSlabRelease && movingSync != nullptr) { allocatorConfig_.enableMovingOnSlabRelease( [](Item& oldItem, Item& newItem, Item* parentPtr) { @@ -100,6 +110,12 @@ Cache::Cache(const CacheConfig& config, } }); + allocatorConfig_.maxEvictionBatch = config_.maxEvictionBatch; + allocatorConfig_.maxPromotionBatch = config_.maxPromotionBatch; + allocatorConfig_.minEvictionBatch = config_.minEvictionBatch; + allocatorConfig_.minPromotionBatch = config_.minPromotionBatch; + allocatorConfig_.maxEvictionPromotionHotness = config_.maxEvictionPromotionHotness; + if (config_.enableItemDestructorCheck) { auto removeCB = [&](const typename Allocator::DestructorData& data) { if (!itemRecords_.validate(data)) { @@ -611,6 +627,21 @@ Stats Cache::getStats() const { ret.slabsApproxFreePercentages = cache_->getCacheMemoryStats().slabsApproxFreePercentages; ret.allocationClassStats = allocationClassStats; + + ret.backgndEvicStats.nEvictedItems = + cacheStats.evictionStats.numMovedItems; + ret.backgndEvicStats.nTraversals = + cacheStats.evictionStats.runCount; + ret.backgndEvicStats.nClasses = + cacheStats.evictionStats.totalClasses; + ret.backgndEvicStats.evictionSize = + cacheStats.evictionStats.totalBytesMoved; + + ret.backgndPromoStats.nPromotedItems = + cacheStats.promotionStats.numMovedItems; + ret.backgndPromoStats.nTraversals = + cacheStats.promotionStats.runCount; + ret.numEvictions = aggregate.numEvictions(); ret.numItems = aggregate.numItems(); ret.evictAttempts = cacheStats.evictionAttempts; @@ -663,6 +694,9 @@ Stats Cache::getStats() const { ret.nvmCounters = cache_->getNvmCacheStatsMap(); } + ret.backgroundEvictionClasses = cache_->getBackgroundMoverClassStats(MoverDir::Evict); + ret.backgroundPromotionClasses = cache_->getBackgroundMoverClassStats(MoverDir::Promote); + // nvm stats from navy if (!isRamOnly() && !navyStats.empty()) { auto lookup = [&navyStats](const std::string& key) { diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h index 5627b93556..7568cae954 100644 --- a/cachelib/cachebench/cache/CacheStats.h +++ b/cachelib/cachebench/cache/CacheStats.h @@ -26,7 +26,33 @@ DECLARE_string(report_memory_usage_stats); namespace facebook { namespace cachelib { namespace cachebench { + +struct BackgroundEvictionStats { + // the number of items this worker evicted by looking at pools/classes stats + uint64_t nEvictedItems{0}; + + // number of times we went executed the thread //TODO: is this def correct? + uint64_t nTraversals{0}; + + // number of classes + uint64_t nClasses{0}; + + // size of evicted items + uint64_t evictionSize{0}; +}; + +struct BackgroundPromotionStats { + // the number of items this worker evicted by looking at pools/classes stats + uint64_t nPromotedItems{0}; + + // number of times we went executed the thread //TODO: is this def correct? + uint64_t nTraversals{0}; +}; + struct Stats { + BackgroundEvictionStats backgndEvicStats; + BackgroundPromotionStats backgndPromoStats; + uint64_t numEvictions{0}; uint64_t numItems{0}; @@ -110,6 +136,9 @@ struct Stats { // cachebench. std::unordered_map nvmCounters; + std::map>> backgroundEvictionClasses; + std::map>> backgroundPromotionClasses; + // errors from the nvm engine. std::unordered_map nvmErrors; @@ -130,6 +159,16 @@ struct Stats { << std::endl; out << folly::sformat("RAM Evictions : {:,}", numEvictions) << std::endl; + auto foreachAC = [&](auto &map, auto cb) { + for (auto &tidStats : map) { + for (auto &pidStat : tidStats.second) { + for (auto &cidStat : pidStat.second) { + cb(tidStats.first, pidStat.first, cidStat.first, cidStat.second); + } + } + } + }; + for (auto pid = 0U; pid < poolUsageFraction.size(); pid++) { out << folly::sformat("Fraction of pool {:,} used : {:.2f}", pid, poolUsageFraction[pid]) @@ -186,6 +225,10 @@ struct Stats { }); } + out << folly::sformat("Tier 0 Background Evicted Items : {:,}", + backgndEvicStats.nEvictedItems) << std::endl; + out << folly::sformat("Tier 0 Background Traversals : {:,}", + backgndEvicStats.nTraversals) << std::endl; if (numCacheGets > 0) { out << folly::sformat("Cache Gets : {:,}", numCacheGets) << std::endl; out << folly::sformat("Hit Ratio : {:6.2f}%", overallHitRatio) @@ -216,6 +259,22 @@ struct Stats { } } + if (!backgroundEvictionClasses.empty() && backgndEvicStats.nEvictedItems > 0 ) { + out << "== Class Background Eviction Counters Map ==" << std::endl; + foreachAC(backgroundEvictionClasses, [&](auto tid, auto pid, auto cid, auto evicted){ + out << folly::sformat("tid{:2} pid{:2} cid{:4} evicted: {:4}", + tid, pid, cid, evicted) << std::endl; + }); + } + + if (!backgroundPromotionClasses.empty() && backgndPromoStats.nPromotedItems > 0) { + out << "== Class Background Promotion Counters Map ==" << std::endl; + foreachAC(backgroundPromotionClasses, [&](auto tid, auto pid, auto cid, auto promoted){ + out << folly::sformat("tid{:2} pid{:2} cid{:4} promoted: {:4}", + tid, pid, cid, promoted) << std::endl; + }); + } + if (numNvmGets > 0 || numNvmDeletes > 0 || numNvmPuts > 0) { const double ramHitRatio = invertPctFn(numCacheGetMiss, numCacheGets); const double nvmHitRatio = invertPctFn(numNvmGetMiss, numNvmGets); diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp index 29cd9cb6a3..8b3ef99d4d 100644 --- a/cachelib/cachebench/util/CacheConfig.cpp +++ b/cachelib/cachebench/util/CacheConfig.cpp @@ -19,6 +19,8 @@ #include "cachelib/allocator/HitsPerSlabStrategy.h" #include "cachelib/allocator/LruTailAgeStrategy.h" #include "cachelib/allocator/RandomStrategy.h" +#include "cachelib/allocator/FreeThresholdStrategy.h" +#include "cachelib/allocator/PromotionStrategy.h" namespace facebook { namespace cachelib { @@ -28,6 +30,9 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) { JSONSetVal(configJson, cacheDir); JSONSetVal(configJson, cacheSizeMB); JSONSetVal(configJson, poolRebalanceIntervalSec); + JSONSetVal(configJson, backgroundEvictorIntervalMilSec); + JSONSetVal(configJson, backgroundPromoterIntervalMilSec); + JSONSetVal(configJson, backgroundEvictorStrategy); JSONSetVal(configJson, moveOnSlabRelease); JSONSetVal(configJson, rebalanceStrategy); JSONSetVal(configJson, rebalanceMinSlabs); @@ -92,18 +97,30 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) { JSONSetVal(configJson, nvmAdmissionRetentionTimeThreshold); JSONSetVal(configJson, customConfigJson); - + + + JSONSetVal(configJson, lowEvictionAcWatermark); + JSONSetVal(configJson, highEvictionAcWatermark); + JSONSetVal(configJson, minAcAllocationWatermark); + JSONSetVal(configJson, maxAcAllocationWatermark); + JSONSetVal(configJson, numDuplicateElements); + JSONSetVal(configJson, syncPromotion); + JSONSetVal(configJson, evictorThreads); + JSONSetVal(configJson, promoterThreads); + + JSONSetVal(configJson, promotionAcWatermark); JSONSetVal(configJson, persistedCacheDir); JSONSetVal(configJson, usePosixShm); - if (configJson.count("memoryTiers")) { - for (auto& it : configJson["memoryTiers"]) { - memoryTierConfigs.push_back(MemoryTierConfig(it).getMemoryTierCacheConfig()); - } - } + JSONSetVal(configJson, maxEvictionBatch); + JSONSetVal(configJson, maxPromotionBatch); + JSONSetVal(configJson, minEvictionBatch); + JSONSetVal(configJson, minPromotionBatch); + JSONSetVal(configJson, maxEvictionPromotionHotness); + // if you added new fields to the configuration, update the JSONSetVal // to make them available for the json configs and increment the size // below - checkCorrectSize(); + checkCorrectSize(); if (numPools != poolSizes.size()) { throw std::invalid_argument(folly::sformat( @@ -138,10 +155,23 @@ MemoryTierConfig::MemoryTierConfig(const folly::dynamic& configJson) { JSONSetVal(configJson, file); JSONSetVal(configJson, ratio); JSONSetVal(configJson, memBindNodes); - checkCorrectSize(); } +std::shared_ptr CacheConfig::getBackgroundEvictorStrategy() const { + if (backgroundEvictorIntervalMilSec == 0) { + return nullptr; + } + return std::make_shared(lowEvictionAcWatermark, highEvictionAcWatermark, maxEvictionBatch, minEvictionBatch); +} + +std::shared_ptr CacheConfig::getBackgroundPromoterStrategy() const { + if (backgroundPromoterIntervalMilSec == 0) { + return nullptr; + } + return std::make_shared(promotionAcWatermark, maxPromotionBatch, minPromotionBatch); +} + static bool starts_with() {return true;} std::vector MemoryTierConfig::parseNumaNodes() { diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h index 7a8c9020b0..c1b18df670 100644 --- a/cachelib/cachebench/util/CacheConfig.h +++ b/cachelib/cachebench/util/CacheConfig.h @@ -20,6 +20,7 @@ #include "cachelib/allocator/CacheAllocator.h" #include "cachelib/allocator/RebalanceStrategy.h" +#include "cachelib/allocator/BackgroundMoverStrategy.h" #include "cachelib/cachebench/util/JSONConfig.h" #include "cachelib/common/Ticker.h" #include "cachelib/navy/common/Device.h" @@ -78,7 +79,10 @@ struct CacheConfig : public JSONConfig { uint64_t cacheSizeMB{0}; uint64_t poolRebalanceIntervalSec{0}; + uint64_t backgroundEvictorIntervalMilSec{0}; + uint64_t backgroundPromoterIntervalMilSec{0}; std::string rebalanceStrategy; + std::string backgroundEvictorStrategy; uint64_t rebalanceMinSlabs{1}; double rebalanceDiffRatio{0.25}; bool moveOnSlabRelease{false}; @@ -256,6 +260,27 @@ struct CacheConfig : public JSONConfig { // eviction-age is more than this threshold. 0 means no threshold uint32_t nvmAdmissionRetentionTimeThreshold{0}; + // See BackgroundMovers.md for complete description + double promotionAcWatermark{4.0}; + double lowEvictionAcWatermark{2.0}; + double highEvictionAcWatermark{5.0}; + double minAcAllocationWatermark{0.0}; + double maxAcAllocationWatermark{0.0}; + + double numDuplicateElements{0.0}; // inclusivness of the cache + double syncPromotion{0.0}; // can promotion be done synchronously in user thread + + uint64_t evictorThreads{1}; + uint64_t promoterThreads{1}; + + uint64_t maxEvictionBatch{40}; + uint64_t maxPromotionBatch{10}; + + uint64_t minEvictionBatch{5}; + uint64_t minPromotionBatch{5}; + + uint64_t maxEvictionPromotionHotness{60}; + // // Options below are not to be populated with JSON // @@ -287,6 +312,8 @@ struct CacheConfig : public JSONConfig { CacheConfig() {} std::shared_ptr getRebalanceStrategy() const; + std::shared_ptr getBackgroundEvictorStrategy() const; + std::shared_ptr getBackgroundPromoterStrategy() const; }; } // namespace cachebench } // namespace cachelib From a47446594fccd7cd7f5c9f341f628dff2fa5e6db Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Mon, 19 Sep 2022 17:28:40 -0400 Subject: [PATCH 02/15] simple tests --- .../tests/AllocatorMemoryTiersTest.cpp | 1 + .../tests/AllocatorMemoryTiersTest.h | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp index d378522b22..78604bc765 100644 --- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp +++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp @@ -26,6 +26,7 @@ using LruAllocatorMemoryTiersTest = AllocatorMemoryTiersTest; TEST_F(LruAllocatorMemoryTiersTest, MultiTiersFromFileInvalid) { this->testMultiTiersFormFileInvalid(); } TEST_F(LruAllocatorMemoryTiersTest, MultiTiersFromFileValid) { this->testMultiTiersFromFileValid(); } TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); } +TEST_F(LruAllocatorMemoryTiersTest, MultiTiersBackgroundMovers ) { this->testMultiTiersBackgroundMovers(); } TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsSysVValid) { this->testMultiTiersNumaBindingsSysVValid(); } TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsPosixValid) { this->testMultiTiersNumaBindingsPosixValid(); } diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h index 16e1f88728..9232c859d1 100644 --- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h +++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h @@ -19,6 +19,8 @@ #include "cachelib/allocator/CacheAllocatorConfig.h" #include "cachelib/allocator/MemoryTierCacheConfig.h" #include "cachelib/allocator/tests/TestBase.h" +#include "cachelib/allocator/FreeThresholdStrategy.h" +#include "cachelib/allocator/PromotionStrategy.h" namespace facebook { namespace cachelib { @@ -62,6 +64,58 @@ class AllocatorMemoryTiersTest : public AllocatorTest { ASSERT(handle != nullptr); ASSERT_NO_THROW(alloc->insertOrReplace(handle)); } + + void testMultiTiersBackgroundMovers() { + typename AllocatorT::Config config; + config.setCacheSize(4 * Slab::kSize); + config.enableCachePersistence("/tmp"); + config.usePosixForShm(); + config.configureMemoryTiers({ + MemoryTierCacheConfig::fromShm() + .setRatio(1), + MemoryTierCacheConfig::fromFile("/tmp/b" + std::to_string(::getpid())) + .setRatio(1) + }); + config.enableBackgroundEvictor(std::make_shared(10, 20, 4, 2), + std::chrono::milliseconds(10),1); + config.enableBackgroundPromoter(std::make_shared(5, 4, 2), + std::chrono::milliseconds(10),1); + + auto allocator = std::make_unique(AllocatorT::SharedMemNew, config); + ASSERT(allocator != nullptr); + + const size_t numBytes = allocator->getCacheMemoryStats().cacheSize; + const size_t kItemSize = 100; + auto poolId = allocator->addPool("default", numBytes); + + const int numItems = 10000; + + int numAllocatedItems = 0; + for (unsigned int i = 0; i < numItems; i++) { + auto handle = util::allocateAccessible( + *allocator, poolId, folly::to(i), kItemSize, 0); + ++numAllocatedItems; + } + + ASSERT_GT(numAllocatedItems, 0); + + const unsigned int keyLen = 100; + const unsigned int nSizes = 10; + const auto sizes = + this->getValidAllocSizes(*allocator, poolId, nSizes, keyLen); + this->fillUpPoolUntilEvictions(*allocator, poolId, sizes, keyLen); + + auto stats = allocator->getGlobalCacheStats(); + auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict); + auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote); + + EXPECT_GT(1, stats.evictionStats.numMovedItems); + EXPECT_GT(1, stats.promotionStats.numMovedItems); + + auto cid = 2; + EXPECT_GT(1, perclassEstats[0][0][cid]); + EXPECT_GT(1, perclassPstats[1][0][cid]); + } void testMultiTiersValidMixed() { typename AllocatorT::Config config; From db0aaa4609c3edda716eda3e012b55e42ef60f64 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 21 Sep 2022 18:09:46 -0400 Subject: [PATCH 03/15] format code + rework documentation --- MultiTierDataMovement.md | 28 ++-------- cachelib/allocator/BackgroundMover-inl.h | 54 +++++++++---------- cachelib/allocator/BackgroundMover.h | 53 +++++++++--------- cachelib/allocator/BackgroundMoverStrategy.h | 8 +-- cachelib/allocator/CacheAllocator-inl.h | 2 +- cachelib/allocator/CacheAllocatorConfig.h | 7 +-- cachelib/allocator/FreeThresholdStrategy.cpp | 39 ++++++++------ cachelib/allocator/FreeThresholdStrategy.h | 21 ++++---- cachelib/allocator/PromotionStrategy.h | 57 ++++++++++---------- 9 files changed, 128 insertions(+), 141 deletions(-) diff --git a/MultiTierDataMovement.md b/MultiTierDataMovement.md index d116f210a0..7fb10d01e6 100644 --- a/MultiTierDataMovement.md +++ b/MultiTierDataMovement.md @@ -12,7 +12,7 @@ thread (green) is integrated to the CacheLib architecture. ## Synchronous Eviction and Promotion -- `disableEvictionToMemory`: Disables eviction to memory (item is always evicted to NVMe or removed +- `disableEviction`: Disables eviction to memory (item is always evicted to NVMe or removed on eviction) ## Background Evictors @@ -23,7 +23,7 @@ strategies and general parameters. - `backgroundEvictorIntervalMilSec`: The interval that this thread runs for - by default the background evictor threads will wake up every 10 ms to scan the AllocationClasses. Also, -the background evictor thead will be woken up everytime there is a failed allocation (from +the background evictor thread will be woken up everytime there is a failed allocation (from a request handling thread) and the current percentage of free memory for the AllocationClass is lower than `lowEvictionAcWatermark`. This may render the interval parameter not as important when there are many allocations occuring from request handling threads. @@ -59,7 +59,7 @@ don't set this above `10`. ## Background Promoters -The background promotes scan each class to see if there are objects to move to a lower +The background promoters scan each class to see if there are objects to move to a lower tier using a given strategy. Here we document the parameters for the different strategies and general parameters. @@ -93,25 +93,3 @@ This value should correlate with `lowEvictionAcWatermark`, `highEvictionAcWaterm - `maxPromotionBatch`: The number of objects to promote in batch during BG promotion. Analogous to `maxEvictionBatch`. It's value should be lower to decrease contention on hot items. -## Allocation policies - -- `maxAcAllocationWatermark`: Item is always allocated in topmost tier if at least this -percentage of the AllocationClass is free. -- `minAcAllocationWatermark`: Item is always allocated in bottom tier if only this percent -of the AllocationClass is free. If percentage of free AllocationClasses is between `maxAcAllocationWatermark` -and `minAcAllocationWatermark`: then extra checks (described below) are performed to decide where to put the element. - -By default, allocation will always be performed from the upper tier. - -- `acTopTierEvictionWatermark`: If there is less that this percent of free memory in topmost tier, cachelib will attempt to evict from top tier. This option takes precedence before allocationWatermarks. - -### Extra policies (used only when percentage of free AllocationClasses is between `maxAcAllocationWatermark` -and `minAcAllocationWatermark`) -- `sizeThresholdPolicy`: If item is smaller than this value, always allocate it in upper tier. -- `defaultTierChancePercentage`: Change (0-100%) of allocating item in top tier - -## MMContainer options - -- `lruInsertionPointSpec`: Can be set per tier when LRU2Q is used. Determines where new items are -inserted. 0 = insert to hot queue, 1 = insert to warm queue, 2 = insert to cold queue -- `markUsefulChance`: Per-tier, determines chance of moving item to the head of LRU on access diff --git a/cachelib/allocator/BackgroundMover-inl.h b/cachelib/allocator/BackgroundMover-inl.h index 04adf4c92d..2b1c55c94e 100644 --- a/cachelib/allocator/BackgroundMover-inl.h +++ b/cachelib/allocator/BackgroundMover-inl.h @@ -17,27 +17,24 @@ namespace facebook { namespace cachelib { - template -BackgroundMover::BackgroundMover(Cache& cache, - std::shared_ptr strategy, - MoverDir direction) - : cache_(cache), - strategy_(strategy), - direction_(direction) -{ - if (direction_ == MoverDir::Evict) { - moverFunc = - BackgroundMoverAPIWrapper::traverseAndEvictItems; - - } else if (direction_ == MoverDir::Promote) { - moverFunc = - BackgroundMoverAPIWrapper::traverseAndPromoteItems; - } +BackgroundMover::BackgroundMover( + Cache& cache, + std::shared_ptr strategy, + MoverDir direction) + : cache_(cache), strategy_(strategy), direction_(direction) { + if (direction_ == MoverDir::Evict) { + moverFunc = BackgroundMoverAPIWrapper::traverseAndEvictItems; + + } else if (direction_ == MoverDir::Promote) { + moverFunc = BackgroundMoverAPIWrapper::traverseAndPromoteItems; + } } template -BackgroundMover::~BackgroundMover() { stop(std::chrono::seconds(0)); } +BackgroundMover::~BackgroundMover() { + stop(std::chrono::seconds(0)); +} template void BackgroundMover::work() { @@ -49,14 +46,14 @@ void BackgroundMover::work() { } template -void BackgroundMover::setAssignedMemory(std::vector> &&assignedMemory) -{ +void BackgroundMover::setAssignedMemory( + std::vector>&& assignedMemory) { XLOG(INFO, "Class assigned to background worker:"); for (auto [tid, pid, cid] : assignedMemory) { XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid); } - mutex.lock_combine([this, &assignedMemory]{ + mutex.lock_combine([this, &assignedMemory] { this->assignedMemory_ = std::move(assignedMemory); }); } @@ -65,31 +62,28 @@ void BackgroundMover::setAssignedMemory(std::vector void BackgroundMover::checkAndRun() { - auto assignedMemory = mutex.lock_combine([this]{ - return assignedMemory_; - }); + auto assignedMemory = mutex.lock_combine([this] { return assignedMemory_; }); unsigned int moves = 0; std::set classes{}; - auto batches = strategy_->calculateBatchSizes(cache_,assignedMemory); + auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory); for (size_t i = 0; i < batches.size(); i++) { const auto [tid, pid, cid] = assignedMemory[i]; const auto batch = batches[i]; - + classes.insert(cid); - const auto& mpStats = cache_.getPoolByTid(pid,tid).getStats(); + const auto& mpStats = cache_.getPoolByTid(pid, tid).getStats(); if (!batch) { continue; } - totalBytesMoved.add(batch * mpStats.acStats.at(cid).allocSize); - - //try moving BATCH items from the class in order to reach free target - auto moved = moverFunc(cache_,tid,pid,cid,batch); + // try moving BATCH items from the class in order to reach free target + auto moved = moverFunc(cache_, tid, pid, cid, batch); moves += moved; moves_per_class_[tid][pid][cid] += moved; + totalBytesMoved.add(moved * mpStats.acStats.at(cid).allocSize); } numTraversals.inc(); diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h index 45593b0f38..77e56a06da 100644 --- a/cachelib/allocator/BackgroundMover.h +++ b/cachelib/allocator/BackgroundMover.h @@ -16,14 +16,13 @@ #pragma once -#include #include +#include -#include "cachelib/allocator/CacheStats.h" -#include "cachelib/common/PeriodicWorker.h" #include "cachelib/allocator/BackgroundMoverStrategy.h" +#include "cachelib/allocator/CacheStats.h" #include "cachelib/common/AtomicCounter.h" - +#include "cachelib/common/PeriodicWorker.h" namespace facebook { namespace cachelib { @@ -32,23 +31,24 @@ namespace cachelib { // needed for the cache api template struct BackgroundMoverAPIWrapper { - static size_t traverseAndEvictItems(C& cache, - unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { - return cache.traverseAndEvictItems(tid,pid,cid,batch); + unsigned int tid, + unsigned int pid, + unsigned int cid, + size_t batch) { + return cache.traverseAndEvictItems(tid, pid, cid, batch); } - + static size_t traverseAndPromoteItems(C& cache, - unsigned int tid, unsigned int pid, unsigned int cid, size_t batch) { - return cache.traverseAndPromoteItems(tid,pid,cid,batch); + unsigned int tid, + unsigned int pid, + unsigned int cid, + size_t batch) { + return cache.traverseAndPromoteItems(tid, pid, cid, batch); } - }; -enum class MoverDir { - Evict = 0, - Promote -}; +enum class MoverDir { Evict = 0, Promote }; // Periodic worker that evicts items from tiers in batches // The primary aim is to reduce insertion times for new items in the @@ -58,35 +58,40 @@ class BackgroundMover : public PeriodicWorker { public: using Cache = CacheT; // @param cache the cache interface - // @param strategy the stragey class that defines how objects are moved, + // @param strategy the stragey class that defines how objects are + // moved, // (promoted vs. evicted and how much) BackgroundMover(Cache& cache, std::shared_ptr strategy, MoverDir direction_); ~BackgroundMover() override; - + BackgroundMoverStats getStats() const noexcept; - std::map>> getClassStats() const noexcept; + std::map>> + getClassStats() const noexcept; - void setAssignedMemory(std::vector> &&assignedMemory); + void setAssignedMemory( + std::vector>&& assignedMemory); private: - std::map>> moves_per_class_; + std::map>> + moves_per_class_; // cache allocator's interface for evicting using Item = typename Cache::Item; - + Cache& cache_; std::shared_ptr strategy_; MoverDir direction_; - - std::function moverFunc; + + std::function + moverFunc; // implements the actual logic of running the background evictor void work() override final; void checkAndRun(); - AtomicCounter numMovedItems{0}; AtomicCounter numTraversals{0}; AtomicCounter totalClasses{0}; diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h index 9a37e8a411..08000c306c 100644 --- a/cachelib/allocator/BackgroundMoverStrategy.h +++ b/cachelib/allocator/BackgroundMoverStrategy.h @@ -23,10 +23,10 @@ namespace cachelib { // Base class for background eviction strategy. class BackgroundMoverStrategy { - -public: - virtual std::vector calculateBatchSizes(const CacheBase& cache, - std::vector> acVec) = 0; + public: + virtual std::vector calculateBatchSizes( + const CacheBase& cache, + std::vector> acVec) = 0; }; } // namespace cachelib diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 1c8882ffef..4890b5d3a1 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -427,7 +427,7 @@ CacheAllocator::allocateInternalTier(TierId tid, if (backgroundEvictor_.size() && !fromBgThread && (memory == nullptr || shouldWakeupBgEvictor(tid, pid, cid))) { backgroundEvictor_[backgroundWorkerId(tid, pid, cid, backgroundEvictor_.size())]->wakeUp(); } - // TODO: Today disableEviction means do not evict from memory (DRAM). + // TODO: Today isEvictionDisabled means do not evict from memory (DRAM). // Should we support eviction between memory tiers (e.g. from DRAM to PMEM)? if (memory == nullptr && !config_.isEvictionDisabled()) { memory = findEviction(tid, pid, cid); diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index 3dedf6337c..387ec26096 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -625,11 +625,8 @@ class CacheAllocatorConfig { // CacheAllocator::startCacheWorkers() bool delayCacheWorkersStart{false}; - - double minAcAllocationWatermark{0.0}; - double maxAcAllocationWatermark{0.0}; - - double promotionAcWatermark{4.0}; + // see MultiTierDataMovement.md + double promotionAcWatermark{4.0}; double lowEvictionAcWatermark{2.0}; double highEvictionAcWatermark{5.0}; double numDuplicateElements{0.0}; // inclusivness of the cache diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp index 5ffc718fa7..d4b8bad758 100644 --- a/cachelib/allocator/FreeThresholdStrategy.cpp +++ b/cachelib/allocator/FreeThresholdStrategy.cpp @@ -21,13 +21,18 @@ namespace facebook { namespace cachelib { - - -FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark, double highEvictionAcWatermark, uint64_t maxEvictionBatch, uint64_t minEvictionBatch) - : lowEvictionAcWatermark(lowEvictionAcWatermark), highEvictionAcWatermark(highEvictionAcWatermark), maxEvictionBatch(maxEvictionBatch), minEvictionBatch(minEvictionBatch) {} +FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark, + double highEvictionAcWatermark, + uint64_t maxEvictionBatch, + uint64_t minEvictionBatch) + : lowEvictionAcWatermark(lowEvictionAcWatermark), + highEvictionAcWatermark(highEvictionAcWatermark), + maxEvictionBatch(maxEvictionBatch), + minEvictionBatch(minEvictionBatch) {} std::vector FreeThresholdStrategy::calculateBatchSizes( - const CacheBase& cache, std::vector> acVec) { + const CacheBase& cache, + std::vector> acVec) { std::vector batches{}; for (auto [tid, pid, cid] : acVec) { auto stats = cache.getAllocationClassStats(tid, pid, cid); @@ -35,7 +40,8 @@ std::vector FreeThresholdStrategy::calculateBatchSizes( batches.push_back(0); } else { auto toFreeMemPercent = highEvictionAcWatermark - stats.approxFreePercent; - auto toFreeItems = static_cast(toFreeMemPercent * stats.memorySize / stats.allocSize); + auto toFreeItems = static_cast( + toFreeMemPercent * stats.memorySize / stats.allocSize); batches.push_back(toFreeItems); } } @@ -48,17 +54,18 @@ std::vector FreeThresholdStrategy::calculateBatchSizes( if (maxBatch == 0) return batches; - std::transform(batches.begin(), batches.end(), batches.begin(), [&](auto numItems){ - if (numItems == 0) { - return 0UL; - } + std::transform( + batches.begin(), batches.end(), batches.begin(), [&](auto numItems) { + if (numItems == 0) { + return 0UL; + } - auto cappedBatchSize = maxEvictionBatch * numItems / maxBatch; - if (cappedBatchSize < minEvictionBatch) - return minEvictionBatch; - else - return cappedBatchSize; - }); + auto cappedBatchSize = maxEvictionBatch * numItems / maxBatch; + if (cappedBatchSize < minEvictionBatch) + return minEvictionBatch; + else + return cappedBatchSize; + }); return batches; } diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h index babd8935cd..7103771852 100644 --- a/cachelib/allocator/FreeThresholdStrategy.h +++ b/cachelib/allocator/FreeThresholdStrategy.h @@ -16,24 +16,27 @@ #pragma once -#include "cachelib/allocator/Cache.h" #include "cachelib/allocator/BackgroundMoverStrategy.h" +#include "cachelib/allocator/Cache.h" namespace facebook { namespace cachelib { - // Base class for background mover strategy. class FreeThresholdStrategy : public BackgroundMoverStrategy { - -public: - FreeThresholdStrategy(double lowEvictionAcWatermark, double highEvictionAcWatermark, uint64_t maxEvictionBatch, uint64_t minEvictionBatch); + public: + FreeThresholdStrategy(double lowEvictionAcWatermark, + double highEvictionAcWatermark, + uint64_t maxEvictionBatch, + uint64_t minEvictionBatch); ~FreeThresholdStrategy() {} - std::vector calculateBatchSizes(const CacheBase& cache, - std::vector> acVecs); -private: - double lowEvictionAcWatermark{2.0}; + std::vector calculateBatchSizes( + const CacheBase& cache, + std::vector> acVecs); + + private: + double lowEvictionAcWatermark{2.0}; double highEvictionAcWatermark{5.0}; uint64_t maxEvictionBatch{40}; uint64_t minEvictionBatch{5}; diff --git a/cachelib/allocator/PromotionStrategy.h b/cachelib/allocator/PromotionStrategy.h index ad9145282b..d5b491bea2 100644 --- a/cachelib/allocator/PromotionStrategy.h +++ b/cachelib/allocator/PromotionStrategy.h @@ -16,26 +16,26 @@ #pragma once -#include "cachelib/allocator/Cache.h" #include "cachelib/allocator/BackgroundMoverStrategy.h" +#include "cachelib/allocator/Cache.h" namespace facebook { namespace cachelib { - // Base class for background eviction strategy. class PromotionStrategy : public BackgroundMoverStrategy { - -public: - PromotionStrategy(uint64_t promotionAcWatermark, uint64_t maxPromotionBatch, uint64_t minPromotionBatch): - promotionAcWatermark(promotionAcWatermark), maxPromotionBatch(maxPromotionBatch), minPromotionBatch(minPromotionBatch) - { - - } + public: + PromotionStrategy(uint64_t promotionAcWatermark, + uint64_t maxPromotionBatch, + uint64_t minPromotionBatch) + : promotionAcWatermark(promotionAcWatermark), + maxPromotionBatch(maxPromotionBatch), + minPromotionBatch(minPromotionBatch) {} ~PromotionStrategy() {} - std::vector calculateBatchSizes(const CacheBase& cache, - std::vector> acVec) { + std::vector calculateBatchSizes( + const CacheBase& cache, + std::vector> acVec) { std::vector batches{}; for (auto [tid, pid, cid] : acVec) { XDCHECK(tid > 0); @@ -43,35 +43,38 @@ class PromotionStrategy : public BackgroundMoverStrategy { if (stats.approxFreePercent < promotionAcWatermark) batches.push_back(0); else { - auto maxPossibleItemsToPromote = static_cast((promotionAcWatermark - stats.approxFreePercent) * - stats.memorySize / stats.allocSize); + auto maxPossibleItemsToPromote = static_cast( + (promotionAcWatermark - stats.approxFreePercent) * + stats.memorySize / stats.allocSize); batches.push_back(maxPossibleItemsToPromote); } } - if (batches.size() == 0) { - return batches; - } + if (batches.size() == 0) { + return batches; + } auto maxBatch = *std::max_element(batches.begin(), batches.end()); if (maxBatch == 0) return batches; - std::transform(batches.begin(), batches.end(), batches.begin(), [&](auto numItems){ - if (numItems == 0) { - return 0UL; - } + std::transform( + batches.begin(), batches.end(), batches.begin(), [&](auto numItems) { + if (numItems == 0) { + return 0UL; + } - auto cappedBatchSize = maxPromotionBatch * numItems / maxBatch; - if (cappedBatchSize < minPromotionBatch) - return minPromotionBatch; - else - return cappedBatchSize; - }); + auto cappedBatchSize = maxPromotionBatch * numItems / maxBatch; + if (cappedBatchSize < minPromotionBatch) + return minPromotionBatch; + else + return cappedBatchSize; + }); return batches; } -private: + + private: double promotionAcWatermark{4.0}; uint64_t maxPromotionBatch{40}; uint64_t minPromotionBatch{5}; From d8c58ff1435dccb5cc5e3844c955b224aee1f432 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 21 Sep 2022 18:33:19 -0400 Subject: [PATCH 04/15] more restrictive test --- cachelib/allocator/tests/AllocatorMemoryTiersTest.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h index 9232c859d1..7e7aa64adc 100644 --- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h +++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h @@ -115,6 +115,9 @@ class AllocatorMemoryTiersTest : public AllocatorTest { auto cid = 2; EXPECT_GT(1, perclassEstats[0][0][cid]); EXPECT_GT(1, perclassPstats[1][0][cid]); + + auto slabStats = allocator->getAllocationClassStats(0,0,cid); + ASSERT_GE(slabStats.approxFreePercent,10); } void testMultiTiersValidMixed() { From 8d4f1a96cbe6ddbd1329efc9672a8a3c4b678306 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 21 Sep 2022 20:42:53 -0400 Subject: [PATCH 05/15] actually enable background workers + improved test --- cachelib/allocator/CacheAllocator-inl.h | 22 ++++++++++ cachelib/allocator/CacheAllocatorConfig.h | 11 +++++ .../tests/AllocatorMemoryTiersTest.h | 41 ++++++++----------- 3 files changed, 51 insertions(+), 23 deletions(-) diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 4890b5d3a1..0d4ba4f25e 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -302,6 +302,18 @@ void CacheAllocator::initWorkers() { config_.poolOptimizeStrategy, config_.ccacheOptimizeStepSizePercent); } + + if (config_.backgroundEvictorEnabled()) { + startNewBackgroundEvictor(config_.backgroundEvictorInterval, + config_.backgroundEvictorStrategy, + config_.backgroundEvictorThreads); + } + + if (config_.backgroundPromoterEnabled()) { + startNewBackgroundPromoter(config_.backgroundPromoterInterval, + config_.backgroundPromoterStrategy, + config_.backgroundPromoterThreads); + } } template @@ -2424,6 +2436,16 @@ PoolId CacheAllocator::addPool( setRebalanceStrategy(pid, std::move(rebalanceStrategy)); setResizeStrategy(pid, std::move(resizeStrategy)); + if (backgroundEvictor_.size()) { + for (size_t id = 0; id < backgroundEvictor_.size(); id++) + backgroundEvictor_[id]->setAssignedMemory(getAssignedMemoryToBgWorker(id, backgroundEvictor_.size(), 0)); + } + + if (backgroundPromoter_.size()) { + for (size_t id = 0; id < backgroundPromoter_.size(); id++) + backgroundPromoter_[id]->setAssignedMemory(getAssignedMemoryToBgWorker(id, backgroundPromoter_.size(), 1)); + } + return pid; } diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index 387ec26096..4060ca2eeb 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -365,6 +365,17 @@ class CacheAllocatorConfig { poolOptimizeStrategy != nullptr; } + // @return whether background evictor thread is enabled + bool backgroundEvictorEnabled() const noexcept { + return backgroundEvictorInterval.count() > 0 && + backgroundEvictorStrategy != nullptr; + } + + bool backgroundPromoterEnabled() const noexcept { + return backgroundPromoterInterval.count() > 0 && + backgroundPromoterStrategy != nullptr; + } + // @return whether memory monitor is enabled bool memMonitoringEnabled() const noexcept { return memMonitorConfig.mode != MemoryMonitor::Disabled && diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h index 7e7aa64adc..044d376d3f 100644 --- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h +++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h @@ -67,7 +67,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest { void testMultiTiersBackgroundMovers() { typename AllocatorT::Config config; - config.setCacheSize(4 * Slab::kSize); + config.setCacheSize(10 * Slab::kSize); config.enableCachePersistence("/tmp"); config.usePosixForShm(); config.configureMemoryTiers({ @@ -76,47 +76,42 @@ class AllocatorMemoryTiersTest : public AllocatorTest { MemoryTierCacheConfig::fromFile("/tmp/b" + std::to_string(::getpid())) .setRatio(1) }); - config.enableBackgroundEvictor(std::make_shared(10, 20, 4, 2), + config.enableBackgroundEvictor(std::make_shared(2, 10, 100, 40), std::chrono::milliseconds(10),1); config.enableBackgroundPromoter(std::make_shared(5, 4, 2), std::chrono::milliseconds(10),1); auto allocator = std::make_unique(AllocatorT::SharedMemNew, config); ASSERT(allocator != nullptr); - const size_t numBytes = allocator->getCacheMemoryStats().cacheSize; - const size_t kItemSize = 100; - auto poolId = allocator->addPool("default", numBytes); - - const int numItems = 10000; - - int numAllocatedItems = 0; - for (unsigned int i = 0; i < numItems; i++) { - auto handle = util::allocateAccessible( - *allocator, poolId, folly::to(i), kItemSize, 0); - ++numAllocatedItems; - } - ASSERT_GT(numAllocatedItems, 0); + auto poolId = allocator->addPool("default", numBytes); const unsigned int keyLen = 100; - const unsigned int nSizes = 10; - const auto sizes = - this->getValidAllocSizes(*allocator, poolId, nSizes, keyLen); + std::vector sizes = {100}; this->fillUpPoolUntilEvictions(*allocator, poolId, sizes, keyLen); + + const auto key = this->getRandomNewKey(*allocator, keyLen); + auto handle = util::allocateAccessible(*allocator, poolId, key, sizes[0]); + ASSERT_NE(nullptr, handle); + const uint8_t cid = allocator->getAllocInfo(handle->getMemory()).classId; + + //wait for bg movers + std::this_thread::sleep_for(std::chrono::seconds(1)); auto stats = allocator->getGlobalCacheStats(); auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict); auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote); - EXPECT_GT(1, stats.evictionStats.numMovedItems); - EXPECT_GT(1, stats.promotionStats.numMovedItems); + EXPECT_GT(stats.evictionStats.numMovedItems,1); + EXPECT_GT(stats.evictionStats.runCount,1); + EXPECT_GT(stats.promotionStats.numMovedItems,1); - auto cid = 2; - EXPECT_GT(1, perclassEstats[0][0][cid]); - EXPECT_GT(1, perclassPstats[1][0][cid]); + EXPECT_GT(perclassEstats[0][0][cid], 1); + EXPECT_GT(perclassPstats[1][0][cid], 1); auto slabStats = allocator->getAllocationClassStats(0,0,cid); + ASSERT_GE(slabStats.approxFreePercent,10); } From 8de0a1c827b758ed59c7d1ea23d764b928e3678c Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Tue, 4 Oct 2022 19:38:34 -0400 Subject: [PATCH 06/15] fixed memory tiers code block in config --- cachelib/cachebench/util/CacheConfig.cpp | 5 +++++ cachelib/external/fbthrift | 2 +- cachelib/external/fizz | 2 +- cachelib/external/folly | 2 +- cachelib/external/wangle | 2 +- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp index 8b3ef99d4d..929913cafe 100644 --- a/cachelib/cachebench/util/CacheConfig.cpp +++ b/cachelib/cachebench/util/CacheConfig.cpp @@ -117,6 +117,11 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) { JSONSetVal(configJson, minPromotionBatch); JSONSetVal(configJson, maxEvictionPromotionHotness); + if (configJson.count("memoryTiers")) { + for (auto& it : configJson["memoryTiers"]) { + memoryTierConfigs.push_back(MemoryTierConfig(it).getMemoryTierCacheConfig()); + } + } // if you added new fields to the configuration, update the JSONSetVal // to make them available for the json configs and increment the size // below diff --git a/cachelib/external/fbthrift b/cachelib/external/fbthrift index 74f3a8fb00..5048711276 160000 --- a/cachelib/external/fbthrift +++ b/cachelib/external/fbthrift @@ -1 +1 @@ -Subproject commit 74f3a8fb00e3963d1e6f2361fce722688d500417 +Subproject commit 5048711276b5f81f46ec874ff5a5bbeabc25b069 diff --git a/cachelib/external/fizz b/cachelib/external/fizz index 54ee5c546a..e6adace244 160000 --- a/cachelib/external/fizz +++ b/cachelib/external/fizz @@ -1 +1 @@ -Subproject commit 54ee5c546a3f1dfd324900560c3a0a79709d3f98 +Subproject commit e6adace24441bf14758d9326efce5974623867cf diff --git a/cachelib/external/folly b/cachelib/external/folly index 0682178745..d41d59bf3d 160000 --- a/cachelib/external/folly +++ b/cachelib/external/folly @@ -1 +1 @@ -Subproject commit 0682178745e25aa739a9fccee44e7475dedd03f0 +Subproject commit d41d59bf3dbf4211a889c4447da3d61db3db61e4 diff --git a/cachelib/external/wangle b/cachelib/external/wangle index fd7d5a8f10..2a4cc34377 160000 --- a/cachelib/external/wangle +++ b/cachelib/external/wangle @@ -1 +1 @@ -Subproject commit fd7d5a8f10670d9edbb5c1135c514ad1733e2707 +Subproject commit 2a4cc34377d16ee0509747d105f01185ac8336d4 From f07f668e0ce22bd079b5c49b25a33132f08ebcb4 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 5 Oct 2022 16:40:39 -0400 Subject: [PATCH 07/15] stop workers --- cachelib/allocator/CacheAllocator-inl.h | 22 ++++++++++++++++++++++ cachelib/allocator/CacheAllocator.h | 2 ++ 2 files changed, 24 insertions(+) diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 0d4ba4f25e..52f8a9412d 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -3521,6 +3521,8 @@ bool CacheAllocator::stopWorkers(std::chrono::seconds timeout) { success &= stopPoolResizer(timeout); success &= stopMemMonitor(timeout); success &= stopReaper(timeout); + success &= stopBackgroundEvictor(timeout); + success &= stopBackgroundPromoter(timeout); return success; } @@ -4021,6 +4023,26 @@ bool CacheAllocator::stopReaper(std::chrono::seconds timeout) { return stopWorker("Reaper", reaper_, timeout); } +template +bool CacheAllocator::stopBackgroundEvictor(std::chrono::seconds timeout) { + bool result = true; + for (size_t i = 0; i < backgroundEvictor_.size(); i++) { + auto ret = stopWorker("BackgroundEvictor", backgroundEvictor_[i], timeout); + result = result && ret; + } + return result; +} + +template +bool CacheAllocator::stopBackgroundPromoter(std::chrono::seconds timeout) { + bool result = true; + for (size_t i = 0; i < backgroundPromoter_.size(); i++) { + auto ret = stopWorker("BackgroundPromoter", backgroundPromoter_[i], timeout); + result = result && ret; + } + return result; +} + template bool CacheAllocator::cleanupStrayShmSegments( const std::string& cacheDir, bool posix /*TODO(SHM_FILE): const std::vector& config */) { diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index a65f14d42c..802abb0115 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -1024,6 +1024,8 @@ class CacheAllocator : public CacheBase { 0}); bool stopMemMonitor(std::chrono::seconds timeout = std::chrono::seconds{0}); bool stopReaper(std::chrono::seconds timeout = std::chrono::seconds{0}); + bool stopBackgroundEvictor(std::chrono::seconds timeout = std::chrono::seconds{0}); + bool stopBackgroundPromoter(std::chrono::seconds timeout = std::chrono::seconds{0}); // Set pool optimization to either true or false // From 7eef3967c52ba83196838bba1193acd4bc11ce17 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 5 Oct 2022 16:57:02 -0400 Subject: [PATCH 08/15] fixed test --- cachelib/allocator/tests/AllocatorMemoryTiersTest.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h index 044d376d3f..f2f7b231fe 100644 --- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h +++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h @@ -96,10 +96,13 @@ class AllocatorMemoryTiersTest : public AllocatorTest { ASSERT_NE(nullptr, handle); const uint8_t cid = allocator->getAllocInfo(handle->getMemory()).classId; - //wait for bg movers - std::this_thread::sleep_for(std::chrono::seconds(1)); auto stats = allocator->getGlobalCacheStats(); + while (stats.evictionStats.runCount < 1) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + stats = allocator->getGlobalCacheStats(); + } + auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict); auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote); From b31c5adcb19a9d315a4a818a8238f6eec0c1d3c8 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 5 Oct 2022 17:39:22 -0400 Subject: [PATCH 09/15] a better test --- .../tests/AllocatorMemoryTiersTest.h | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h index f2f7b231fe..9501a2a6e0 100644 --- a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h +++ b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h @@ -94,28 +94,30 @@ class AllocatorMemoryTiersTest : public AllocatorTest { const auto key = this->getRandomNewKey(*allocator, keyLen); auto handle = util::allocateAccessible(*allocator, poolId, key, sizes[0]); ASSERT_NE(nullptr, handle); + const uint8_t cid = allocator->getAllocInfo(handle->getMemory()).classId; - - auto stats = allocator->getGlobalCacheStats(); - while (stats.evictionStats.runCount < 1) { + auto slabStats = allocator->getAllocationClassStats(0,0,cid); + const auto& mpStats = allocator->getPoolByTid(poolId, 0).getStats(); + //cache is 10MB should move about 1MB to reach 10% free + uint32_t approxEvict = (1024*1024)/mpStats.acStats.at(cid).allocSize; + while (stats.evictionStats.numMovedItems < approxEvict*0.95 && slabStats.approxFreePercent >= 9.5) { std::this_thread::sleep_for(std::chrono::seconds(1)); stats = allocator->getGlobalCacheStats(); + slabStats = allocator->getAllocationClassStats(0,0,cid); } + ASSERT_GE(slabStats.approxFreePercent,9.5); auto perclassEstats = allocator->getBackgroundMoverClassStats(MoverDir::Evict); auto perclassPstats = allocator->getBackgroundMoverClassStats(MoverDir::Promote); - EXPECT_GT(stats.evictionStats.numMovedItems,1); - EXPECT_GT(stats.evictionStats.runCount,1); - EXPECT_GT(stats.promotionStats.numMovedItems,1); + ASSERT_GE(stats.evictionStats.numMovedItems,1); + ASSERT_GE(stats.evictionStats.runCount,1); + ASSERT_GE(stats.promotionStats.numMovedItems,1); - EXPECT_GT(perclassEstats[0][0][cid], 1); - EXPECT_GT(perclassPstats[1][0][cid], 1); + ASSERT_GE(perclassEstats[0][0][cid], 1); + ASSERT_GE(perclassPstats[1][0][cid], 1); - auto slabStats = allocator->getAllocationClassStats(0,0,cid); - - ASSERT_GE(slabStats.approxFreePercent,10); } void testMultiTiersValidMixed() { From 1efa0468cba0f0e0a8bb659b16815b4c3e6b3730 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Thu, 6 Oct 2022 07:54:24 -0400 Subject: [PATCH 10/15] remove extra headers --- cachelib/allocator/BackgroundMover.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h index 77e56a06da..5538561e11 100644 --- a/cachelib/allocator/BackgroundMover.h +++ b/cachelib/allocator/BackgroundMover.h @@ -16,9 +16,6 @@ #pragma once -#include -#include - #include "cachelib/allocator/BackgroundMoverStrategy.h" #include "cachelib/allocator/CacheStats.h" #include "cachelib/common/AtomicCounter.h" From dc7f5defec0b04f466c50eaf715802464c3b5336 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Thu, 6 Oct 2022 07:56:17 -0400 Subject: [PATCH 11/15] revert fbthrift dep --- cachelib/external/fbthrift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cachelib/external/fbthrift b/cachelib/external/fbthrift index 5048711276..74f3a8fb00 160000 --- a/cachelib/external/fbthrift +++ b/cachelib/external/fbthrift @@ -1 +1 @@ -Subproject commit 5048711276b5f81f46ec874ff5a5bbeabc25b069 +Subproject commit 74f3a8fb00e3963d1e6f2361fce722688d500417 From f40e8c0e80a5a85ef16b54fa1ad149661a095951 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Thu, 6 Oct 2022 08:05:21 -0400 Subject: [PATCH 12/15] fix fizz --- cachelib/external/fizz | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cachelib/external/fizz b/cachelib/external/fizz index e6adace244..54ee5c546a 160000 --- a/cachelib/external/fizz +++ b/cachelib/external/fizz @@ -1 +1 @@ -Subproject commit e6adace24441bf14758d9326efce5974623867cf +Subproject commit 54ee5c546a3f1dfd324900560c3a0a79709d3f98 From 72bfe1f4287f1b287e6206d3502593dc59758fd3 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Thu, 6 Oct 2022 08:10:37 -0400 Subject: [PATCH 13/15] fix folly --- cachelib/external/folly | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cachelib/external/folly b/cachelib/external/folly index d41d59bf3d..0682178745 160000 --- a/cachelib/external/folly +++ b/cachelib/external/folly @@ -1 +1 @@ -Subproject commit d41d59bf3dbf4211a889c4447da3d61db3db61e4 +Subproject commit 0682178745e25aa739a9fccee44e7475dedd03f0 From d16e6065906c233e5fc5a1ca7d7b53425ea06041 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Thu, 6 Oct 2022 08:11:45 -0400 Subject: [PATCH 14/15] fix wangle --- cachelib/external/wangle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cachelib/external/wangle b/cachelib/external/wangle index 2a4cc34377..fd7d5a8f10 160000 --- a/cachelib/external/wangle +++ b/cachelib/external/wangle @@ -1 +1 @@ -Subproject commit 2a4cc34377d16ee0509747d105f01185ac8336d4 +Subproject commit fd7d5a8f10670d9edbb5c1135c514ad1733e2707 From 2d7b9582603fde5879bc257a42205d5bdaf5d19f Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Thu, 20 Oct 2022 16:20:12 -0400 Subject: [PATCH 15/15] updated per review comments --- MultiTierDataMovement.md | 5 ----- cachelib/allocator/BackgroundMover-inl.h | 2 +- cachelib/allocator/BackgroundMover.h | 4 ++-- cachelib/allocator/BackgroundMoverStrategy.h | 11 ++++++++++- cachelib/allocator/CacheAllocator-inl.h | 2 +- cachelib/allocator/FreeThresholdStrategy.cpp | 2 +- cachelib/allocator/FreeThresholdStrategy.h | 2 +- cachelib/allocator/PromotionStrategy.h | 2 +- 8 files changed, 17 insertions(+), 13 deletions(-) diff --git a/MultiTierDataMovement.md b/MultiTierDataMovement.md index 7fb10d01e6..cccc14b947 100644 --- a/MultiTierDataMovement.md +++ b/MultiTierDataMovement.md @@ -10,11 +10,6 @@ thread (green) is integrated to the CacheLib architecture. BackgroundEvictor

-## Synchronous Eviction and Promotion - -- `disableEviction`: Disables eviction to memory (item is always evicted to NVMe or removed -on eviction) - ## Background Evictors The background evictors scan each class to see if there are objects to move the next (lower) diff --git a/cachelib/allocator/BackgroundMover-inl.h b/cachelib/allocator/BackgroundMover-inl.h index 2b1c55c94e..b77436635f 100644 --- a/cachelib/allocator/BackgroundMover-inl.h +++ b/cachelib/allocator/BackgroundMover-inl.h @@ -47,7 +47,7 @@ void BackgroundMover::work() { template void BackgroundMover::setAssignedMemory( - std::vector>&& assignedMemory) { + std::vector&& assignedMemory) { XLOG(INFO, "Class assigned to background worker:"); for (auto [tid, pid, cid] : assignedMemory) { XLOGF(INFO, "Tid: {}, Pid: {}, Cid: {}", tid, pid, cid); diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h index 5538561e11..1246676d6e 100644 --- a/cachelib/allocator/BackgroundMover.h +++ b/cachelib/allocator/BackgroundMover.h @@ -69,7 +69,7 @@ class BackgroundMover : public PeriodicWorker { getClassStats() const noexcept; void setAssignedMemory( - std::vector>&& assignedMemory); + std::vector&& assignedMemory); private: std::map>> @@ -94,7 +94,7 @@ class BackgroundMover : public PeriodicWorker { AtomicCounter totalClasses{0}; AtomicCounter totalBytesMoved{0}; - std::vector> assignedMemory_; + std::vector assignedMemory_; folly::DistributedMutex mutex; }; } // namespace cachelib diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h index 08000c306c..7706a625a5 100644 --- a/cachelib/allocator/BackgroundMoverStrategy.h +++ b/cachelib/allocator/BackgroundMoverStrategy.h @@ -18,15 +18,24 @@ #include "cachelib/allocator/Cache.h" + namespace facebook { namespace cachelib { +struct MemoryDescriptorType { + MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) : + tid_(tid), pid_(pid), cid_(cid) {} + TierId tid_; + PoolId pid_; + ClassId cid_; +}; + // Base class for background eviction strategy. class BackgroundMoverStrategy { public: virtual std::vector calculateBatchSizes( const CacheBase& cache, - std::vector> acVec) = 0; + std::vector acVec) = 0; }; } // namespace cachelib diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 52f8a9412d..7b0f1de992 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -3941,7 +3941,7 @@ bool CacheAllocator::startNewReaper( template auto CacheAllocator::getAssignedMemoryToBgWorker(size_t evictorId, size_t numWorkers, TierId tid) { - std::vector> asssignedMemory; + std::vector asssignedMemory; // TODO: for now, only evict from tier 0 auto pools = filterCompactCachePools(allocator_[tid]->getPoolIds()); for (const auto pid : pools) { diff --git a/cachelib/allocator/FreeThresholdStrategy.cpp b/cachelib/allocator/FreeThresholdStrategy.cpp index d4b8bad758..1311d678fb 100644 --- a/cachelib/allocator/FreeThresholdStrategy.cpp +++ b/cachelib/allocator/FreeThresholdStrategy.cpp @@ -32,7 +32,7 @@ FreeThresholdStrategy::FreeThresholdStrategy(double lowEvictionAcWatermark, std::vector FreeThresholdStrategy::calculateBatchSizes( const CacheBase& cache, - std::vector> acVec) { + std::vector acVec) { std::vector batches{}; for (auto [tid, pid, cid] : acVec) { auto stats = cache.getAllocationClassStats(tid, pid, cid); diff --git a/cachelib/allocator/FreeThresholdStrategy.h b/cachelib/allocator/FreeThresholdStrategy.h index 7103771852..94316bfe82 100644 --- a/cachelib/allocator/FreeThresholdStrategy.h +++ b/cachelib/allocator/FreeThresholdStrategy.h @@ -33,7 +33,7 @@ class FreeThresholdStrategy : public BackgroundMoverStrategy { std::vector calculateBatchSizes( const CacheBase& cache, - std::vector> acVecs); + std::vector acVecs); private: double lowEvictionAcWatermark{2.0}; diff --git a/cachelib/allocator/PromotionStrategy.h b/cachelib/allocator/PromotionStrategy.h index d5b491bea2..2347def5f9 100644 --- a/cachelib/allocator/PromotionStrategy.h +++ b/cachelib/allocator/PromotionStrategy.h @@ -35,7 +35,7 @@ class PromotionStrategy : public BackgroundMoverStrategy { std::vector calculateBatchSizes( const CacheBase& cache, - std::vector> acVec) { + std::vector acVec) { std::vector batches{}; for (auto [tid, pid, cid] : acVec) { XDCHECK(tid > 0);