facebook · guptask · Nov 14, 2022 · jaesoo-fb · Feb 3, 2023 · guptask
diff --git a/cachelib/allocator/CCacheAllocator.cpp b/cachelib/allocator/CCacheAllocator.cpp
@@ -36,7 +36,9 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator,
       currentChunksIndex_(0) {
   auto& currentChunks = chunks_[currentChunksIndex_];
   for (auto chunk : *object.chunks()) {
-    currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk)));
+    // TODO : pass multi-tier flag when compact cache supports multi-tier config
+    currentChunks.push_back(
+        allocator_.unCompress(CompressedPtr(chunk), false /* isMultiTier */));
   }
 }
 
@@ -97,7 +99,9 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() {
 
   std::lock_guard<std::mutex> guard(resizeLock_);
   for (auto chunk : getCurrentChunks()) {
-    object.chunks()->push_back(allocator_.compress(chunk).saveState());
+    // TODO : pass multi-tier flag when compact cache supports multi-tier config
+    object.chunks()->push_back(
+        allocator_.compress(chunk, false /* isMultiTier */).saveState());
   }
   return object;
 }

diff --git a/cachelib/allocator/memory/CompressedPtr.h b/cachelib/allocator/memory/CompressedPtr.h
@@ -27,18 +27,29 @@ namespace cachelib {
 
 class SlabAllocator;
 
-// the following are for pointer compression for the memory allocator.  We
-// compress pointers by storing the slab index and the alloc index of the
-// allocation inside the slab. With slab worth kNumSlabBits of data, if we
-// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6
-// bits for storing the alloc index. This leaves the remaining (32 -
-// (kNumSlabBits - 6)) bits for the slab index.  Hence we can index 256 GiB
-// of memory in slabs and index anything more than 64 byte allocations inside
-// the slab using a 32 bit representation.
-//
 // This CompressedPtr makes decompression fast by staying away from division and
 // modulo arithmetic and doing those during the compression time. We most often
-// decompress a CompressedPtr than compress a pointer while creating one.
+// decompress a CompressedPtr than compress a pointer while creating one. This
+// is used for pointer compression by the memory allocator.
+
+// We compress pointers by storing the tier index, slab index and alloc index of
+// the allocation inside the slab.
+
+// In original design (without memory tiers):
+// Each slab addresses 22 bits of allocations (kNumSlabBits). This is split into
+// allocation index and allocation size. If we have the min allocation size of
+// 64 bytes (kMinAllocPower = 6 bits), remaining kNumSlabBits(22) -
+// kMinAllocPower(6) = 16 bits for storing the alloc index. This leaves the
+// remaining 32 - (kNumSlabBits - kMinAllocPower) = 16 bits  for the  slab
+// index. Hence we can index 256 GiB of memory.
+
+// In multi-tier design:
+// kNumSlabIds and kMinAllocPower remains unchanged. The tier id occupies the
+// 32nd bit only since its value cannot exceed kMaxTiers(2). This leaves the
+// remaining 32 - (kNumSlabBits - kMinAllocPower) - 1 bit for tier id = 15 bits
+// for the slab index. Hence we can index 128 GiB of memory per tier in
+// multi-tier configuration.
+
 class CACHELIB_PACKED_ATTR CompressedPtr {
  public:
   using PtrType = uint32_t;
@@ -62,9 +73,10 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
     return static_cast<uint32_t>(1) << (Slab::kMinAllocPower);
   }
 
-  // maximum adressable memory for pointer compression to work.
+  // maximum addressable memory for pointer compression to work.
   static constexpr size_t getMaxAddressableSize() noexcept {
-    return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits);
+    return static_cast<size_t>(1)
+           << (numSlabIdxBits(false) + Slab::kNumSlabBits);
   }
 
   // default construct to nullptr.
@@ -89,8 +101,11 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
   PtrType ptr_{kNull};
 
   // create a compressed pointer for a valid memory allocation.
-  CompressedPtr(uint32_t slabIdx, uint32_t allocIdx)
-      : ptr_(compress(slabIdx, allocIdx)) {}
+  CompressedPtr(uint32_t slabIdx,
+                uint32_t allocIdx,
+                bool isMultiTiered,
+                TierId tid = 0)
+      : ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {}
 
   constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}
 
@@ -100,33 +115,63 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
   static constexpr unsigned int kNumAllocIdxBits =
       Slab::kNumSlabBits - Slab::kMinAllocPower;
 
+  // Use 32nd bit position for TierId
+  static constexpr unsigned int kNumTierIdxOffset = 31;
+
   static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;
 
-  // Number of bits for the slab index. This will be the top 16 bits of the
-  // compressed ptr.
-  static constexpr unsigned int kNumSlabIdxBits =
-      NumBits<PtrType>::value - kNumAllocIdxBits;
+  // kNumTierIdxBits most significant bits
+  static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset;
+
+  // Number of bits for the slab index.
+  // If CacheLib is single tiered, slab index will be the top 16 bits
+  // of the compressed ptr.
+  // Else if CacheLib is multi-tiered, the topmost 32nd bit will be
+  // reserved for tier id. The following 15 bits will be reserved for
+  // the slab index.
+  static constexpr unsigned int numSlabIdxBits(bool isMultiTiered) {
+    return kNumTierIdxOffset - kNumAllocIdxBits + (!isMultiTiered);
+  }
 
   // Compress the given slabIdx and allocIdx into a 32-bit compressed
   // pointer.
-  static PtrType compress(uint32_t slabIdx, uint32_t allocIdx) noexcept {
+  static PtrType compress(uint32_t slabIdx,
+                          uint32_t allocIdx,
+                          bool isMultiTiered,
+                          TierId tid) noexcept {
     XDCHECK_LE(allocIdx, kAllocIdxMask);
-    XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
-    return (slabIdx << kNumAllocIdxBits) + allocIdx;
+    XDCHECK_LT(slabIdx, (1u << numSlabIdxBits(isMultiTiered)) - 1);
+    if (!isMultiTiered) {
+      return (slabIdx << kNumAllocIdxBits) + allocIdx;
+    }
+    return (static_cast<uint32_t>(tid) << kNumTierIdxOffset) +
+           (slabIdx << kNumAllocIdxBits) + allocIdx;
   }
 
   // Get the slab index of the compressed ptr
-  uint32_t getSlabIdx() const noexcept {
+  uint32_t getSlabIdx(bool isMultiTiered) const noexcept {
     XDCHECK(!isNull());
-    return static_cast<uint32_t>(ptr_ >> kNumAllocIdxBits);
+    auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_;
+    return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
   }
 
   // Get the allocation index of the compressed ptr
   uint32_t getAllocIdx() const noexcept {
     XDCHECK(!isNull());
+    // Note: tid check not required in ptr_ since only
+    //       the lower 16 bits are being read here.
     return static_cast<uint32_t>(ptr_ & kAllocIdxMask);
   }
 
+  uint32_t getTierId(bool isMultiTiered) const noexcept {
+    XDCHECK(!isNull());
+    return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0;
+  }
+
+  void setTierId(TierId tid) noexcept {
+    ptr_ += static_cast<uint32_t>(tid) << kNumTierIdxOffset;
+  }
+
   friend SlabAllocator;
 };
 
@@ -137,11 +182,12 @@ class PtrCompressor {
       : allocator_(allocator) {}
 
   const CompressedPtr compress(const PtrType* uncompressed) const {
-    return allocator_.compress(uncompressed);
+    return allocator_.compress(uncompressed, false /* isMultiTiered */);
   }
 
   PtrType* unCompress(const CompressedPtr compressed) const {
-    return static_cast<PtrType*>(allocator_.unCompress(compressed));
+    return static_cast<PtrType*>(
+        allocator_.unCompress(compressed, false /* isMultiTiered */));
   }
 
   bool operator==(const PtrCompressor& rhs) const noexcept {

diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h
@@ -534,8 +534,9 @@ class MemoryAllocator {
   //                as the original pointer is valid.
   //
   // @throw  std::invalid_argument if the ptr is invalid.
-  CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
-    return slabAllocator_.compress(ptr);
+  CompressedPtr CACHELIB_INLINE compress(const void* ptr,
+                                         bool isMultiTiered) const {
+    return slabAllocator_.compress(ptr, isMultiTiered);
   }
 
   // retrieve the raw pointer corresponding to the compressed pointer. This is
@@ -546,8 +547,9 @@ class MemoryAllocator {
   // @return        the raw pointer corresponding to this compressed pointer.
   //
   // @throw   std::invalid_argument if the compressed pointer is invalid.
-  void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const {
-    return slabAllocator_.unCompress(cPtr);
+  void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr,
+                                   bool isMultiTiered) const {
+    return slabAllocator_.unCompress(cPtr, isMultiTiered);
   }
 
   // a special implementation of pointer compression for benchmarking purposes.

diff --git a/cachelib/allocator/memory/Slab.h b/cachelib/allocator/memory/Slab.h
@@ -50,6 +50,8 @@ namespace cachelib {
  * independantly by the SlabAllocator.
  */
 
+// identifier for the memory tier
+using TierId = int8_t;
 // identifier for the memory pool
 using PoolId = int8_t;
 // identifier for the allocation class

diff --git a/cachelib/allocator/memory/SlabAllocator.cpp b/cachelib/allocator/memory/SlabAllocator.cpp
@@ -48,7 +48,6 @@ using PtrType = CompressedPtr::PtrType;
 constexpr uint64_t SlabAllocator::kAddressMask;
 constexpr PtrType CompressedPtr::kAllocIdxMask;
 constexpr unsigned int CompressedPtr::kNumAllocIdxBits;
-constexpr unsigned int CompressedPtr::kNumSlabIdxBits;
 
 constexpr unsigned int SlabAllocator::kLockSleepMS;
 constexpr size_t SlabAllocator::kPagesPerStep;

diff --git a/cachelib/allocator/memory/SlabAllocator.h b/cachelib/allocator/memory/SlabAllocator.h
@@ -225,7 +225,8 @@ class SlabAllocator {
   // the corresponding memory allocator. trying to inline this just increases
   // the code size and does not move the needle on the benchmarks much.
   // Calling this with invalid input in optimized build is undefined behavior.
-  CompressedPtr CACHELIB_INLINE compress(const void* ptr) const {
+  CompressedPtr CACHELIB_INLINE compress(const void* ptr,
+                                         bool isMultiTiered) const {
     if (ptr == nullptr) {
       return CompressedPtr{};
     }
@@ -246,18 +247,23 @@ class SlabAllocator {
         static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) -
                               reinterpret_cast<const uint8_t*>(slab)) /
         allocSize;
-    return CompressedPtr{slabIndex, allocIdx};
+    return CompressedPtr{slabIndex, allocIdx, isMultiTiered};
   }
 
   // uncompress the point and return the raw ptr.  This function never throws
   // in optimized build and assumes that the caller is responsible for calling
   // it with a valid compressed pointer.
-  void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const {
+  void* CACHELIB_INLINE unCompress(const CompressedPtr ptr,
+                                   bool isMultiTiered) const {
     if (ptr.isNull()) {
       return nullptr;
     }
 
-    const SlabIdx slabIndex = ptr.getSlabIdx();
+    /* TODO: isMultiTiered set to false by default.
+       Multi-tiering flag will have no impact till
+       rest of the multi-tiering changes are merged.
+     */
+    const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered);
     const uint32_t allocIdx = ptr.getAllocIdx();
     const Slab* slab = &slabMemoryStart_[slabIndex];
 

diff --git a/cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp b/cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp
@@ -401,13 +401,28 @@ TEST_F(MemoryAllocatorTest, PointerCompression) {
   for (const auto& pool : poolAllocs) {
     const auto& allocs = pool.second;
     for (const auto* alloc : allocs) {
-      CompressedPtr ptr = m.compress(alloc);
+      CompressedPtr ptr = m.compress(alloc, false /* isMultiTiered */);
       ASSERT_FALSE(ptr.isNull());
-      ASSERT_EQ(alloc, m.unCompress(ptr));
+      ASSERT_EQ(alloc, m.unCompress(ptr, false /* isMultiTiered */));
     }
   }
 
-  ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr)));
+  ASSERT_EQ(nullptr,
+            m.unCompress(m.compress(nullptr, false /* isMultiTiered */),
+                         false /* isMultiTiered */));
+
+  // test pointer compression with multi-tier
+  for (const auto& pool : poolAllocs) {
+    const auto& allocs = pool.second;
+    for (const auto* alloc : allocs) {
+      CompressedPtr ptr = m.compress(alloc, true /* isMultiTiered */);
+      ASSERT_FALSE(ptr.isNull());
+      ASSERT_EQ(alloc, m.unCompress(ptr, true /* isMultiTiered */));
+    }
+  }
+
+  ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, true /* isMultiTiered */),
+                                  true /* isMultiTiered */));
 }
 
 TEST_F(MemoryAllocatorTest, Restorable) {

diff --git a/cachelib/benchmarks/PtrCompressionBench.cpp b/cachelib/benchmarks/PtrCompressionBench.cpp
@@ -61,7 +61,8 @@ void buildAllocs(size_t poolSize) {
         void* alloc = ma->allocate(pid, size);
         XDCHECK_GE(size, CompressedPtr::getMinAllocSize());
         if (alloc != nullptr) {
-          validAllocs.push_back({alloc, ma->compress(alloc)});
+          validAllocs.push_back(
+              {alloc, ma->compress(alloc, false /* isMultiTiered */)});
           validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)});
           numAllocations++;
         }
@@ -83,7 +84,7 @@ BENCHMARK(CompressionAlt) {
 
 BENCHMARK_RELATIVE(Compression) {
   for (const auto& alloc : validAllocs) {
-    CompressedPtr c = m->compress(alloc.first);
+    CompressedPtr c = m->compress(alloc.first, false /* isMultiTiered */);
     folly::doNotOptimizeAway(c);
   }
 }
@@ -97,7 +98,7 @@ BENCHMARK(DeCompressAlt) {
 
 BENCHMARK_RELATIVE(DeCompress) {
   for (const auto& alloc : validAllocs) {
-    void* ptr = m->unCompress(alloc.second);
+    void* ptr = m->unCompress(alloc.second, false /* isMultiTiered */);
     folly::doNotOptimizeAway(ptr);
   }
 }