Skip to content

Commit 3181c63

Browse files
committed
tier admission
item movement test updated class in test use transparent sync for item movement remove extra whitespace updates per comments updates per comments (still outstanding on the todos)
1 parent 829a434 commit 3181c63

19 files changed

+356
-32
lines changed

cachelib/allocator/Cache.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,11 @@ class CacheBase {
9393
virtual bool isObjectCache() const = 0;
9494

9595
// Get the reference to a memory pool, for stats purposes
96+
// uses the top most memory tier by default
9697
//
9798
// @param poolId The pool id to query
9899
virtual const MemoryPool& getPool(PoolId poolId) const = 0;
99-
100+
100101
// Get Pool specific stats (regular pools). This includes stats from the
101102
// Memory Pool and also the cache.
102103
//

cachelib/allocator/CacheAllocator-inl.h

+88-10
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,22 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
405405
// TODO: per-tier
406406
(*stats_.allocAttempts)[pid][cid].inc();
407407

408-
void* memory = allocator_[tid]->allocate(pid, requiredSize);
408+
void *memory = nullptr;
409+
410+
if (tid == 0 && config_.acTopTierEvictionWatermark > 0.0
411+
&& getAllocationClassStats(tid, pid, cid)
412+
.approxFreePercent < config_.acTopTierEvictionWatermark) {
413+
memory = findEviction(tid, pid, cid);
414+
}
415+
416+
if (memory == nullptr) {
417+
// TODO: should we try allocate item even if this will result in violating
418+
// acTopTierEvictionWatermark?
419+
memory = allocator_[tid]->allocate(pid, requiredSize);
420+
}
421+
409422
// TODO: Today disableEviction means do not evict from memory (DRAM).
410-
// Should we support eviction between memory tiers (e.g. from DRAM to PMEM)?
423+
// Should we support eviction between memory tiers (e.g. from DRAM to next tier)?
411424
if (memory == nullptr && !config_.isEvictionDisabled()) {
412425
memory = findEviction(tid, pid, cid);
413426
}
@@ -448,19 +461,71 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
448461
return handle;
449462
}
450463

464+
template <typename CacheTrait>
465+
TierId
466+
CacheAllocator<CacheTrait>::getTargetTierForItem(PoolId pid,
467+
typename Item::Key key,
468+
uint32_t size,
469+
uint32_t creationTime,
470+
uint32_t expiryTime) {
471+
if (getNumTiers() == 1)
472+
return 0;
473+
474+
if (config_.forceAllocationTier != UINT64_MAX) {
475+
return config_.forceAllocationTier;
476+
}
477+
478+
const TierId defaultTargetTier = 0;
479+
480+
const auto requiredSize = Item::getRequiredSize(key, size);
481+
const auto cid = allocator_[defaultTargetTier]->getAllocationClassId(pid, requiredSize);
482+
483+
auto freePercentage = getAllocationClassStats(defaultTargetTier, pid, cid).approxFreePercent;
484+
485+
// TODO: COULD we implement BG worker which would move slabs around
486+
// so that there is similar amount of free space in each pool/ac.
487+
// Should this be responsibility of BG evictor?
488+
489+
if (freePercentage >= config_.maxAcAllocationWatermark)
490+
return defaultTargetTier;
491+
492+
if (freePercentage <= config_.minAcAllocationWatermark)
493+
return defaultTargetTier + 1;
494+
495+
// TODO: we can even think about creating different allocation classes for different tiers
496+
// and we could look at possible fragmentation when deciding where to put the item
497+
if (config_.sizeThresholdPolicy)
498+
return requiredSize < config_.sizeThresholdPolicy ? defaultTargetTier : defaultTargetTier + 1;
499+
500+
// TODO: (e.g. always put chained items to PMEM)
501+
// if (chainedItemsPolicy)
502+
// return item.isChainedItem() ? defaultTargetTier + 1 : defaultTargetTier;
503+
504+
// TODO:
505+
// if (expiryTimePolicy)
506+
// return (expiryTime - creationTime) < expiryTimePolicy ? defaultTargetTier : defaultTargetTier + 1;
507+
508+
// TODO:
509+
// if (keyPolicy) // this can be based on key length or some other properties
510+
// return getTargetTierForKey(key);
511+
512+
// TODO:
513+
// if (compressabilityPolicy) // if compresses well store in PMEM? latency will be higher anyway
514+
// return TODO;
515+
516+
// TODO: only works for 2 tiers
517+
return (folly::Random::rand32() % 100) < config_.defaultTierChancePercentage ? defaultTargetTier : defaultTargetTier + 1;
518+
}
519+
451520
template <typename CacheTrait>
452521
typename CacheAllocator<CacheTrait>::WriteHandle
453522
CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
454523
typename Item::Key key,
455524
uint32_t size,
456525
uint32_t creationTime,
457526
uint32_t expiryTime) {
458-
auto tid = 0; /* TODO: consult admission policy */
459-
for(TierId tid = 0; tid < getNumTiers(); ++tid) {
460-
auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime);
461-
if (handle) return handle;
462-
}
463-
return {};
527+
auto tid = getTargetTierForItem(pid, key, size, creationTime, expiryTime);
528+
return allocateInternalTier(tid, pid, key, size, creationTime, expiryTime);
464529
}
465530

466531
template <typename CacheTrait>
@@ -1636,6 +1701,13 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
16361701
return true;
16371702
}
16381703

1704+
template <typename CacheTrait>
1705+
bool CacheAllocator<CacheTrait>::shouldEvictToNextMemoryTier(
1706+
TierId sourceTierId, TierId targetTierId, PoolId pid, Item& item)
1707+
{
1708+
return !(config_.disableEvictionToMemory);
1709+
}
1710+
16391711
template <typename CacheTrait>
16401712
typename CacheAllocator<CacheTrait>::WriteHandle
16411713
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
@@ -1649,8 +1721,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
16491721
if (handle) { return handle; }
16501722
}
16511723

1652-
TierId nextTier = tid; // TODO - calculate this based on some admission policy
1724+
TierId nextTier = tid;
16531725
while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
1726+
if (!shouldEvictToNextMemoryTier(tid, nextTier, pid, item))
1727+
continue;
16541728
// allocateInternal might trigger another eviction
16551729
auto newItemHdl = allocateInternalTier(nextTier, pid,
16561730
item.getKey(),
@@ -2431,6 +2505,10 @@ void CacheAllocator<CacheTrait>::createMMContainers(const PoolId pid,
24312505
.getAllocsPerSlab()
24322506
: 0);
24332507
for (TierId tid = 0; tid < getNumTiers(); tid++) {
2508+
if constexpr (std::is_same_v<MMConfig, MMLru::Config> || std::is_same_v<MMConfig, MM2Q::Config>) {
2509+
config.lruInsertionPointSpec = config_.memoryTierConfigs[tid].lruInsertionPointSpec ;
2510+
config.markUsefulChance = config_.memoryTierConfigs[tid].markUsefulChance;
2511+
}
24342512
mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_));
24352513
}
24362514
}
@@ -2485,7 +2563,7 @@ std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIds() const {
24852563
folly::SharedMutex::ReadHolder r(poolsResizeAndRebalanceLock_);
24862564
// TODO - get rid of the duplication - right now, each tier
24872565
// holds pool objects with mostly the same info
2488-
return filterCompactCachePools(allocator_[0]->getPoolIds());
2566+
return filterCompactCachePools(allocator_[currentTier()]->getPoolIds());
24892567
}
24902568

24912569
template <typename CacheTrait>

cachelib/allocator/CacheAllocator.h

+8
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,11 @@ class CacheAllocator : public CacheBase {
12871287
// @param types the type of the memory used
12881288
// @param config the configuration for the whole cache allocator
12891289
CacheAllocator(InitMemType types, Config config);
1290+
1291+
TierId getTargetTierForItem(PoolId pid, typename Item::Key key,
1292+
uint32_t size,
1293+
uint32_t creationTime,
1294+
uint32_t expiryTime);
12901295

12911296
// This is the last step in item release. We also use this for the eviction
12921297
// scenario where we have to do everything, but not release the allocation
@@ -1678,6 +1683,9 @@ class CacheAllocator : public CacheBase {
16781683
// handle to the item. On failure an empty handle.
16791684
WriteHandle tryEvictToNextMemoryTier(Item& item);
16801685

1686+
bool shouldEvictToNextMemoryTier(TierId sourceTierId,
1687+
TierId targetTierId, PoolId pid, Item& item);
1688+
16811689
size_t memoryTierSize(TierId tid) const;
16821690

16831691
// Deserializer CacheAllocatorMetadata and verify the version

cachelib/allocator/CacheAllocatorConfig.h

+8
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,14 @@ class CacheAllocatorConfig {
603603
// If true, we will delay worker start until user explicitly calls
604604
// CacheAllocator::startCacheWorkers()
605605
bool delayCacheWorkersStart{false};
606+
bool disableEvictionToMemory{false};
607+
608+
double minAcAllocationWatermark{0.0}; //if % free in AC is <= then try to allocate in next tier
609+
double maxAcAllocationWatermark{0.0}; //if % free in AC is >= then allocate in default target tier
610+
double acTopTierEvictionWatermark{0.0}; // evict from 1st tier if % free is < watermark
611+
uint64_t sizeThresholdPolicy{0}; // only allow items < threshold in top tier
612+
double defaultTierChancePercentage{50.0}; //randomly allocate items among top and bottom tiers
613+
uint64_t forceAllocationTier{UINT64_MAX}; //force allocations to happen in this tier
606614

607615
friend CacheT;
608616

cachelib/allocator/MM2Q-inl.h

+28-5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include <folly/Random.h>
18+
1719
namespace facebook {
1820
namespace cachelib {
1921

@@ -104,6 +106,10 @@ bool MM2Q::Container<T, HookPtr>::recordAccess(T& node,
104106
return false;
105107
}
106108

109+
// TODO: % 100 is not very accurate
110+
if (config_.markUsefulChance < 100.0 && folly::Random::rand32() % 100 >= config_.markUsefulChance)
111+
return false;
112+
107113
return lruMutex_->lock_combine(func);
108114
}
109115
return false;
@@ -223,15 +229,32 @@ void MM2Q::Container<T, HookPtr>::rebalance() noexcept {
223229
template <typename T, MM2Q::Hook<T> T::*HookPtr>
224230
bool MM2Q::Container<T, HookPtr>::add(T& node) noexcept {
225231
const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
226-
return lruMutex_->lock_combine([this, &node, currTime]() {
232+
233+
auto insertToList = [this, &node] {
234+
if (config_.lruInsertionPointSpec == 0) {
235+
markHot(node);
236+
unmarkCold(node);
237+
unmarkTail(node);
238+
lru_.getList(LruType::Hot).linkAtHead(node);
239+
} else if (config_.lruInsertionPointSpec == 1) {
240+
unmarkHot(node);
241+
unmarkCold(node);
242+
unmarkTail(node);
243+
lru_.getList(LruType::Warm).linkAtHead(node);
244+
} else {
245+
unmarkHot(node);
246+
markCold(node);
247+
unmarkTail(node);
248+
lru_.getList(LruType::Cold).linkAtHead(node);
249+
}
250+
};
251+
252+
return lruMutex_->lock_combine([this, &node, currTime, &insertToList]() {
227253
if (node.isInMMContainer()) {
228254
return false;
229255
}
230256

231-
markHot(node);
232-
unmarkCold(node);
233-
unmarkTail(node);
234-
lru_.getList(LruType::Hot).linkAtHead(node);
257+
insertToList();
235258
rebalance();
236259

237260
node.markInMMContainer();

cachelib/allocator/MM2Q.h

+2
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ class MM2Q {
306306
// Minimum interval between reconfigurations. If 0, reconfigure is never
307307
// called.
308308
std::chrono::seconds mmReconfigureIntervalSecs{};
309+
double markUsefulChance{100.0};
310+
uint8_t lruInsertionPointSpec {0};
309311
};
310312

311313
// The container object which can be used to keep track of objects of type

cachelib/allocator/MMLru-inl.h

+6
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include <folly/Random.h>
18+
1719
namespace facebook {
1820
namespace cachelib {
1921
namespace detail {
@@ -87,6 +89,10 @@ bool MMLru::Container<T, HookPtr>::recordAccess(T& node,
8789
return false;
8890
}
8991

92+
// TODO: % 100 is not very accurate
93+
if (config_.markUsefulChance < 100.0 && folly::Random::rand32() % 100 < config_.markUsefulChance)
94+
return false;
95+
9096
lruMutex_->lock_combine(func);
9197
return true;
9298
}

cachelib/allocator/MMLru.h

+2
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ class MMLru {
189189
// access. If set, and tryLock fails, access will not result in promotion.
190190
bool tryLockUpdate{false};
191191

192+
double markUsefulChance{100.0};
193+
192194
// By default insertions happen at the head of the LRU. If we need
193195
// insertions at the middle of lru we can adjust this to be a non-zero.
194196
// Ex: lruInsertionPointSpec = 1, we insert at the middle (1/2 from end)

cachelib/allocator/MMTinyLFU-inl.h

-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,6 @@ MMTinyLFU::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
228228
fun(Iterator{LockHolder{}, *this});
229229
}
230230

231-
232231
template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
233232
void MMTinyLFU::Container<T, HookPtr>::removeLocked(T& node) noexcept {
234233
if (isTiny(node)) {

cachelib/allocator/MemoryTierCacheConfig.h

+4
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ class MemoryTierCacheConfig {
8181

8282
const ShmTypeOpts& getShmTypeOpts() const noexcept { return shmOpts; }
8383

84+
// TODO: move it to MMContainer config
85+
double markUsefulChance{100.0}; // call mark useful only with this
86+
uint8_t lruInsertionPointSpec{0}; // look at LRU/LRU2Q description (possible values vary)
87+
8488
private:
8589
// Ratio is a number of parts of the total cache size to be allocated for this
8690
// tier. E.g. if X is a total cache size, Yi are ratios specified for memory

cachelib/allocator/memory/MemoryAllocator.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,8 @@ class MemoryAllocator {
660660
&& ptr < slabAllocator_.getSlabMemoryEnd();
661661
}
662662

663-
private:
663+
// TODO:
664+
// private:
664665
// @param memory pointer to the memory.
665666
// @return the MemoryPool corresponding to the memory.
666667
// @throw std::invalid_argument if the memory does not belong to any active

cachelib/allocator/memory/MemoryAllocatorStats.h

+4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ struct ACStats {
5454
constexpr size_t getTotalFreeMemory() const noexcept {
5555
return Slab::kSize * freeSlabs + freeAllocs * allocSize;
5656
}
57+
58+
constexpr size_t getTotalMemory() const noexcept {
59+
return activeAllocs * allocSize;
60+
}
5761
};
5862

5963
// structure to query stats corresponding to a MemoryPool

cachelib/allocator/memory/MemoryPool.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@ class MemoryPool {
308308
// @param value new value for the curSlabsAdvised_
309309
void setNumSlabsAdvised(uint64_t value) { curSlabsAdvised_ = value; }
310310

311-
private:
311+
// TODO:
312+
// private:
312313
// container for storing a vector of AllocationClass.
313314
using ACVector = std::vector<std::unique_ptr<AllocationClass>>;
314315

cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ TEST_F(LruAllocatorMemoryTiersTest, MultiTiersFromFileValid) { this->testMultiTi
2828
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
2929
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsSysVValid) { this->testMultiTiersNumaBindingsSysVValid(); }
3030
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsPosixValid) { this->testMultiTiersNumaBindingsPosixValid(); }
31+
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersForceTierAllocation) { this->testMultiTiersForceTierAllocation(); }
32+
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersWatermarkTierAllocation) { this->testMultiTiersWatermarkAllocation(); }
33+
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersSyncEviction) { this->testSyncEviction(); }
3134

3235
} // end of namespace tests
3336
} // end of namespace cachelib

0 commit comments

Comments
 (0)