tier admission

byrnedj · byrnedj · commit 3181c63d6b83 · 2022-09-20T13:07:06.000-04:00
item movement test

updated class in test

use transparent sync for item movement

remove extra whitespace

updates per comments

updates per comments (still outstanding on the todos)
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
@@ -93,10 +93,11 @@ class CacheBase {
   virtual bool isObjectCache() const = 0;
 
   // Get the reference  to a memory pool, for stats purposes
+  // uses the top most memory tier by default
   //
   // @param poolId    The pool id to query
   virtual const MemoryPool& getPool(PoolId poolId) const = 0;
-
+  
   // Get Pool specific stats (regular pools). This includes stats from the
   // Memory Pool and also the cache.
   //
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
@@ -405,9 +405,22 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
   // TODO: per-tier
   (*stats_.allocAttempts)[pid][cid].inc();
 
-  void* memory = allocator_[tid]->allocate(pid, requiredSize);
+  void *memory = nullptr;
+
+  if (tid == 0 && config_.acTopTierEvictionWatermark > 0.0
+    && getAllocationClassStats(tid, pid, cid)
+      .approxFreePercent < config_.acTopTierEvictionWatermark) {
+    memory = findEviction(tid, pid, cid);
+  } 
+  
+  if (memory == nullptr) {
+    // TODO: should we try allocate item even if this will result in violating
+    // acTopTierEvictionWatermark?
+    memory = allocator_[tid]->allocate(pid, requiredSize);
+  }
+
   // TODO: Today disableEviction means do not evict from memory (DRAM).
-  //       Should we support eviction between memory tiers (e.g. from DRAM to PMEM)?
+  //       Should we support eviction between memory tiers (e.g. from DRAM to next tier)?
   if (memory == nullptr && !config_.isEvictionDisabled()) {
     memory = findEviction(tid, pid, cid);
   }
@@ -448,19 +461,71 @@ CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
   return handle;
 }
 
+template <typename CacheTrait>
+TierId
+CacheAllocator<CacheTrait>::getTargetTierForItem(PoolId pid,
+                                             typename Item::Key key,
+                                             uint32_t size,
+                                             uint32_t creationTime,
+                                             uint32_t expiryTime) {
+  if (getNumTiers() == 1)
+    return 0;
+
+  if (config_.forceAllocationTier != UINT64_MAX) {
+    return config_.forceAllocationTier;
+  }
+
+  const TierId defaultTargetTier = 0;
+
+  const auto requiredSize = Item::getRequiredSize(key, size);
+  const auto cid = allocator_[defaultTargetTier]->getAllocationClassId(pid, requiredSize);
+
+  auto freePercentage = getAllocationClassStats(defaultTargetTier, pid, cid).approxFreePercent;
+
+  // TODO: COULD we implement BG worker which would move slabs around
+  // so that there is similar amount of free space in each pool/ac.
+  // Should this be responsibility of BG evictor?
+
+  if (freePercentage >= config_.maxAcAllocationWatermark)
+    return defaultTargetTier;
+
+  if (freePercentage <= config_.minAcAllocationWatermark)
+    return defaultTargetTier + 1;
+
+  // TODO: we can even think about creating different allocation classes for different tiers
+  // and we could look at possible fragmentation when deciding where to put the item
+  if (config_.sizeThresholdPolicy)
+    return requiredSize < config_.sizeThresholdPolicy ? defaultTargetTier : defaultTargetTier + 1;
+
+  // TODO: (e.g. always put chained items to PMEM)
+  // if (chainedItemsPolicy)
+  //  return item.isChainedItem() ? defaultTargetTier + 1 : defaultTargetTier;
+
+  // TODO:
+  // if (expiryTimePolicy)
+  //   return (expiryTime - creationTime) < expiryTimePolicy ? defaultTargetTier : defaultTargetTier + 1;
+
+  // TODO:
+  // if (keyPolicy) // this can be based on key length or some other properties
+  //  return getTargetTierForKey(key);
+
+  // TODO:
+  // if (compressabilityPolicy) // if compresses well store in PMEM? latency will be higher anyway
+  //  return TODO;
+
+  // TODO: only works for 2 tiers
+  return (folly::Random::rand32() % 100) < config_.defaultTierChancePercentage ? defaultTargetTier : defaultTargetTier + 1;
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
                                              typename Item::Key key,
                                              uint32_t size,
                                              uint32_t creationTime,
                                              uint32_t expiryTime) {
-  auto tid = 0; /* TODO: consult admission policy */
-  for(TierId tid = 0; tid < getNumTiers(); ++tid) {
-    auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime);
-    if (handle) return handle;
-  }
-  return {};
+  auto tid = getTargetTierForItem(pid, key, size, creationTime, expiryTime);
+  return allocateInternalTier(tid, pid, key, size, creationTime, expiryTime);
 }
 
 template <typename CacheTrait>
@@ -1636,6 +1701,13 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
   return true;
 }
 
+template <typename CacheTrait>
+bool CacheAllocator<CacheTrait>::shouldEvictToNextMemoryTier(
+    TierId sourceTierId, TierId targetTierId, PoolId pid, Item& item)
+{
+  return !(config_.disableEvictionToMemory);
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
@@ -1649,8 +1721,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
     if (handle) { return handle; }
   }
 
-  TierId nextTier = tid; // TODO - calculate this based on some admission policy
+  TierId nextTier = tid;
   while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
+    if (!shouldEvictToNextMemoryTier(tid, nextTier, pid, item))
+      continue;
     // allocateInternal might trigger another eviction
     auto newItemHdl = allocateInternalTier(nextTier, pid,
                      item.getKey(),
@@ -2431,6 +2505,10 @@ void CacheAllocator<CacheTrait>::createMMContainers(const PoolId pid,
                   .getAllocsPerSlab()
             : 0);
     for (TierId tid = 0; tid < getNumTiers(); tid++) {
+      if constexpr (std::is_same_v<MMConfig, MMLru::Config> || std::is_same_v<MMConfig, MM2Q::Config>) {
+        config.lruInsertionPointSpec  = config_.memoryTierConfigs[tid].lruInsertionPointSpec ;
+        config.markUsefulChance = config_.memoryTierConfigs[tid].markUsefulChance;
+      }
       mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_));
     }
   }
@@ -2485,7 +2563,7 @@ std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIds() const {
   folly::SharedMutex::ReadHolder r(poolsResizeAndRebalanceLock_);
   // TODO - get rid of the duplication - right now, each tier
   // holds pool objects with mostly the same info
-  return filterCompactCachePools(allocator_[0]->getPoolIds());
+  return filterCompactCachePools(allocator_[currentTier()]->getPoolIds());
 }
 
 template <typename CacheTrait>
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
@@ -1287,6 +1287,11 @@ class CacheAllocator : public CacheBase {
   // @param types         the type of the memory used
   // @param config        the configuration for the whole cache allocator
   CacheAllocator(InitMemType types, Config config);
+  
+  TierId getTargetTierForItem(PoolId pid, typename Item::Key key,
+                                             uint32_t size,
+                                             uint32_t creationTime,
+                                             uint32_t expiryTime);
 
   // This is the last step in item release. We also use this for the eviction
   // scenario where we have to do everything, but not release the allocation
@@ -1678,6 +1683,9 @@ class CacheAllocator : public CacheBase {
   //         handle to the item. On failure an empty handle. 
   WriteHandle tryEvictToNextMemoryTier(Item& item);
 
+  bool shouldEvictToNextMemoryTier(TierId sourceTierId,
+        TierId targetTierId, PoolId pid, Item& item);
+
   size_t memoryTierSize(TierId tid) const;
 
   // Deserializer CacheAllocatorMetadata and verify the version
diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h
@@ -603,6 +603,14 @@ class CacheAllocatorConfig {
   // If true, we will delay worker start until user explicitly calls
   // CacheAllocator::startCacheWorkers()
   bool delayCacheWorkersStart{false};
+  bool disableEvictionToMemory{false};
+
+  double minAcAllocationWatermark{0.0}; //if % free in AC is <= then try to allocate in next tier 
+  double maxAcAllocationWatermark{0.0}; //if % free in AC is >= then allocate in default target tier
+  double acTopTierEvictionWatermark{0.0}; // evict from 1st tier if % free is < watermark
+  uint64_t sizeThresholdPolicy{0}; // only allow items < threshold in top tier   
+  double defaultTierChancePercentage{50.0}; //randomly allocate items among top and bottom tiers
+  uint64_t forceAllocationTier{UINT64_MAX}; //force allocations to happen in this tier
 
   friend CacheT;
 
diff --git a/cachelib/allocator/MM2Q-inl.h b/cachelib/allocator/MM2Q-inl.h
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <folly/Random.h>
+
 namespace facebook {
 namespace cachelib {
 
@@ -104,6 +106,10 @@ bool MM2Q::Container<T, HookPtr>::recordAccess(T& node,
       return false;
     }
 
+  // TODO: % 100 is not very accurate
+  if (config_.markUsefulChance < 100.0 && folly::Random::rand32() % 100 >= config_.markUsefulChance)
+    return false;
+
     return lruMutex_->lock_combine(func);
   }
   return false;
@@ -223,15 +229,32 @@ void MM2Q::Container<T, HookPtr>::rebalance() noexcept {
 template <typename T, MM2Q::Hook<T> T::*HookPtr>
 bool MM2Q::Container<T, HookPtr>::add(T& node) noexcept {
   const auto currTime = static_cast<Time>(util::getCurrentTimeSec());
-  return lruMutex_->lock_combine([this, &node, currTime]() {
+
+  auto insertToList = [this, &node] {
+    if (config_.lruInsertionPointSpec == 0) {
+      markHot(node);
+      unmarkCold(node);
+      unmarkTail(node);
+      lru_.getList(LruType::Hot).linkAtHead(node);
+    } else if (config_.lruInsertionPointSpec == 1) {
+      unmarkHot(node);
+      unmarkCold(node);
+      unmarkTail(node);
+      lru_.getList(LruType::Warm).linkAtHead(node);
+    } else {
+      unmarkHot(node);
+      markCold(node);
+      unmarkTail(node);
+      lru_.getList(LruType::Cold).linkAtHead(node);
+    }
+  };
+
+  return lruMutex_->lock_combine([this, &node, currTime, &insertToList]() {
     if (node.isInMMContainer()) {
       return false;
     }
 
-    markHot(node);
-    unmarkCold(node);
-    unmarkTail(node);
-    lru_.getList(LruType::Hot).linkAtHead(node);
+    insertToList();
     rebalance();
 
     node.markInMMContainer();
diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h
@@ -306,6 +306,8 @@ class MM2Q {
     // Minimum interval between reconfigurations. If 0, reconfigure is never
     // called.
     std::chrono::seconds mmReconfigureIntervalSecs{};
+    double markUsefulChance{100.0};
+    uint8_t lruInsertionPointSpec {0};
   };
 
   // The container object which can be used to keep track of objects of type
diff --git a/cachelib/allocator/MMLru-inl.h b/cachelib/allocator/MMLru-inl.h
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <folly/Random.h>
+
 namespace facebook {
 namespace cachelib {
 namespace detail {
@@ -87,6 +89,10 @@ bool MMLru::Container<T, HookPtr>::recordAccess(T& node,
       return false;
     }
 
+    // TODO: % 100 is not very accurate
+    if (config_.markUsefulChance < 100.0 && folly::Random::rand32() % 100 < config_.markUsefulChance)
+      return false;
+
     lruMutex_->lock_combine(func);
     return true;
   }
diff --git a/cachelib/allocator/MMLru.h b/cachelib/allocator/MMLru.h
@@ -189,6 +189,8 @@ class MMLru {
     // access. If set, and tryLock fails, access will not result in promotion.
     bool tryLockUpdate{false};
 
+    double markUsefulChance{100.0};
+
     // By default insertions happen at the head of the LRU. If we need
     // insertions at the middle of lru we can adjust this to be a non-zero.
     // Ex: lruInsertionPointSpec = 1, we insert at the middle (1/2 from end)
diff --git a/cachelib/allocator/MMTinyLFU-inl.h b/cachelib/allocator/MMTinyLFU-inl.h
@@ -228,7 +228,6 @@ MMTinyLFU::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
   fun(Iterator{LockHolder{}, *this});
 }
 
-
 template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
 void MMTinyLFU::Container<T, HookPtr>::removeLocked(T& node) noexcept {
   if (isTiny(node)) {
diff --git a/cachelib/allocator/MemoryTierCacheConfig.h b/cachelib/allocator/MemoryTierCacheConfig.h
@@ -81,6 +81,10 @@ class MemoryTierCacheConfig {
   
   const ShmTypeOpts& getShmTypeOpts() const noexcept { return shmOpts; }
 
+    // TODO: move it to MMContainer config
+  double markUsefulChance{100.0}; // call mark useful only with this
+  uint8_t lruInsertionPointSpec{0}; // look at LRU/LRU2Q description (possible values vary)
+
 private:
   // Ratio is a number of parts of the total cache size to be allocated for this
   // tier. E.g. if X is a total cache size, Yi are ratios specified for memory
diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h
@@ -660,7 +660,8 @@ class MemoryAllocator {
       && ptr < slabAllocator_.getSlabMemoryEnd();
   }
 
- private:
+ // TODO:
+ // private:
   // @param memory    pointer to the memory.
   // @return          the MemoryPool corresponding to the memory.
   // @throw std::invalid_argument if the memory does not belong to any active
diff --git a/cachelib/allocator/memory/MemoryAllocatorStats.h b/cachelib/allocator/memory/MemoryAllocatorStats.h
@@ -54,6 +54,10 @@ struct ACStats {
   constexpr size_t getTotalFreeMemory() const noexcept {
     return Slab::kSize * freeSlabs + freeAllocs * allocSize;
   }
+
+  constexpr size_t getTotalMemory() const noexcept {
+    return activeAllocs * allocSize;
+  }
 };
 
 // structure to query stats corresponding to a MemoryPool
diff --git a/cachelib/allocator/memory/MemoryPool.h b/cachelib/allocator/memory/MemoryPool.h
@@ -308,7 +308,8 @@ class MemoryPool {
   // @param value  new value for the curSlabsAdvised_
   void setNumSlabsAdvised(uint64_t value) { curSlabsAdvised_ = value; }
 
- private:
+ // TODO:
+ // private:
   // container for storing a vector of AllocationClass.
   using ACVector = std::vector<std::unique_ptr<AllocationClass>>;
 
diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp b/cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp
@@ -28,6 +28,9 @@ TEST_F(LruAllocatorMemoryTiersTest, MultiTiersFromFileValid) { this->testMultiTi
 TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
 TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsSysVValid) { this->testMultiTiersNumaBindingsSysVValid(); }
 TEST_F(LruAllocatorMemoryTiersTest, MultiTiersNumaBindingsPosixValid) { this->testMultiTiersNumaBindingsPosixValid(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersForceTierAllocation) { this->testMultiTiersForceTierAllocation(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersWatermarkTierAllocation) { this->testMultiTiersWatermarkAllocation(); }
+TEST_F(LruAllocatorMemoryTiersTest, MultiTiersSyncEviction) { this->testSyncEviction(); }
 
 } // end of namespace tests
 } // end of namespace cachelib
diff --git a/cachelib/allocator/tests/AllocatorMemoryTiersTest.h b/cachelib/allocator/tests/AllocatorMemoryTiersTest.h
diff --git a/cachelib/cachebench/cache/Cache-inl.h b/cachelib/cachebench/cache/Cache-inl.h
diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h
diff --git a/cachelib/cachebench/util/CacheConfig.cpp b/cachelib/cachebench/util/CacheConfig.cpp
diff --git a/cachelib/cachebench/util/CacheConfig.h b/cachelib/cachebench/util/CacheConfig.h

Original file line number	Diff line number	Diff line change
`@@ -228,7 +228,6 @@ MMTinyLFU::Container<T, HookPtr>::withEvictionIterator(F&& fun) {`
`228`	`228`	`fun(Iterator{LockHolder{}, *this});`
`229`	`229`	`}`
`230`	`230`
`231`		`-`
`232`	`231`	`template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>`
`233`	`232`	`void MMTinyLFU::Container<T, HookPtr>::removeLocked(T& node) noexcept {`
`234`	`233`	`if (isTiny(node)) {`
Original file line number	Diff line number	Diff line change
`@@ -660,7 +660,8 @@ class MemoryAllocator {`
`660`	`660`	`&& ptr < slabAllocator_.getSlabMemoryEnd();`
`661`	`661`	`}`
`662`	`662`
`663`		`- private:`
	`663`	`+ // TODO:`
	`664`	`+ // private:`
`664`	`665`	`// @param memory pointer to the memory.`
`665`	`666`	`// @return the MemoryPool corresponding to the memory.`
`666`	`667`	`// @throw std::invalid_argument if the memory does not belong to any active`