diff --git a/cachelib/allocator/CCacheAllocator.cpp b/cachelib/allocator/CCacheAllocator.cpp index 2709bde377..dd1986114b 100644 --- a/cachelib/allocator/CCacheAllocator.cpp +++ b/cachelib/allocator/CCacheAllocator.cpp @@ -36,7 +36,9 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator, currentChunksIndex_(0) { auto& currentChunks = chunks_[currentChunksIndex_]; for (auto chunk : *object.chunks()) { - currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk))); + // TODO : pass multi-tier flag when compact cache supports multi-tier config + currentChunks.push_back( + allocator_.unCompress(CompressedPtr(chunk), false /* isMultiTier */)); } } @@ -97,7 +99,9 @@ CCacheAllocator::SerializationType CCacheAllocator::saveState() { std::lock_guard guard(resizeLock_); for (auto chunk : getCurrentChunks()) { - object.chunks()->push_back(allocator_.compress(chunk).saveState()); + // TODO : pass multi-tier flag when compact cache supports multi-tier config + object.chunks()->push_back( + allocator_.compress(chunk, false /* isMultiTier */).saveState()); } return object; } diff --git a/cachelib/allocator/memory/CompressedPtr.h b/cachelib/allocator/memory/CompressedPtr.h index 96d39ae2b9..029abd91b9 100644 --- a/cachelib/allocator/memory/CompressedPtr.h +++ b/cachelib/allocator/memory/CompressedPtr.h @@ -27,18 +27,29 @@ namespace cachelib { class SlabAllocator; -// the following are for pointer compression for the memory allocator. We -// compress pointers by storing the slab index and the alloc index of the -// allocation inside the slab. With slab worth kNumSlabBits of data, if we -// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6 -// bits for storing the alloc index. This leaves the remaining (32 - -// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB -// of memory in slabs and index anything more than 64 byte allocations inside -// the slab using a 32 bit representation. -// // This CompressedPtr makes decompression fast by staying away from division and // modulo arithmetic and doing those during the compression time. We most often -// decompress a CompressedPtr than compress a pointer while creating one. +// decompress a CompressedPtr than compress a pointer while creating one. This +// is used for pointer compression by the memory allocator. + +// We compress pointers by storing the tier index, slab index and alloc index of +// the allocation inside the slab. + +// In original design (without memory tiers): +// Each slab addresses 22 bits of allocations (kNumSlabBits). This is split into +// allocation index and allocation size. If we have the min allocation size of +// 64 bytes (kMinAllocPower = 6 bits), remaining kNumSlabBits(22) - +// kMinAllocPower(6) = 16 bits for storing the alloc index. This leaves the +// remaining 32 - (kNumSlabBits - kMinAllocPower) = 16 bits for the slab +// index. Hence we can index 256 GiB of memory. + +// In multi-tier design: +// kNumSlabIds and kMinAllocPower remains unchanged. The tier id occupies the +// 32nd bit only since its value cannot exceed kMaxTiers(2). This leaves the +// remaining 32 - (kNumSlabBits - kMinAllocPower) - 1 bit for tier id = 15 bits +// for the slab index. Hence we can index 128 GiB of memory per tier in +// multi-tier configuration. + class CACHELIB_PACKED_ATTR CompressedPtr { public: using PtrType = uint32_t; @@ -62,9 +73,10 @@ class CACHELIB_PACKED_ATTR CompressedPtr { return static_cast(1) << (Slab::kMinAllocPower); } - // maximum adressable memory for pointer compression to work. + // maximum addressable memory for pointer compression to work. static constexpr size_t getMaxAddressableSize() noexcept { - return static_cast(1) << (kNumSlabIdxBits + Slab::kNumSlabBits); + return static_cast(1) + << (numSlabIdxBits(false) + Slab::kNumSlabBits); } // default construct to nullptr. @@ -89,8 +101,11 @@ class CACHELIB_PACKED_ATTR CompressedPtr { PtrType ptr_{kNull}; // create a compressed pointer for a valid memory allocation. - CompressedPtr(uint32_t slabIdx, uint32_t allocIdx) - : ptr_(compress(slabIdx, allocIdx)) {} + CompressedPtr(uint32_t slabIdx, + uint32_t allocIdx, + bool isMultiTiered, + TierId tid = 0) + : ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {} constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {} @@ -100,33 +115,63 @@ class CACHELIB_PACKED_ATTR CompressedPtr { static constexpr unsigned int kNumAllocIdxBits = Slab::kNumSlabBits - Slab::kMinAllocPower; + // Use 32nd bit position for TierId + static constexpr unsigned int kNumTierIdxOffset = 31; + static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1; - // Number of bits for the slab index. This will be the top 16 bits of the - // compressed ptr. - static constexpr unsigned int kNumSlabIdxBits = - NumBits::value - kNumAllocIdxBits; + // kNumTierIdxBits most significant bits + static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset; + + // Number of bits for the slab index. + // If CacheLib is single tiered, slab index will be the top 16 bits + // of the compressed ptr. + // Else if CacheLib is multi-tiered, the topmost 32nd bit will be + // reserved for tier id. The following 15 bits will be reserved for + // the slab index. + static constexpr unsigned int numSlabIdxBits(bool isMultiTiered) { + return kNumTierIdxOffset - kNumAllocIdxBits + (!isMultiTiered); + } // Compress the given slabIdx and allocIdx into a 32-bit compressed // pointer. - static PtrType compress(uint32_t slabIdx, uint32_t allocIdx) noexcept { + static PtrType compress(uint32_t slabIdx, + uint32_t allocIdx, + bool isMultiTiered, + TierId tid) noexcept { XDCHECK_LE(allocIdx, kAllocIdxMask); - XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1); - return (slabIdx << kNumAllocIdxBits) + allocIdx; + XDCHECK_LT(slabIdx, (1u << numSlabIdxBits(isMultiTiered)) - 1); + if (!isMultiTiered) { + return (slabIdx << kNumAllocIdxBits) + allocIdx; + } + return (static_cast(tid) << kNumTierIdxOffset) + + (slabIdx << kNumAllocIdxBits) + allocIdx; } // Get the slab index of the compressed ptr - uint32_t getSlabIdx() const noexcept { + uint32_t getSlabIdx(bool isMultiTiered) const noexcept { XDCHECK(!isNull()); - return static_cast(ptr_ >> kNumAllocIdxBits); + auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_; + return static_cast(noTierIdPtr >> kNumAllocIdxBits); } // Get the allocation index of the compressed ptr uint32_t getAllocIdx() const noexcept { XDCHECK(!isNull()); + // Note: tid check not required in ptr_ since only + // the lower 16 bits are being read here. return static_cast(ptr_ & kAllocIdxMask); } + uint32_t getTierId(bool isMultiTiered) const noexcept { + XDCHECK(!isNull()); + return isMultiTiered ? static_cast(ptr_ >> kNumTierIdxOffset) : 0; + } + + void setTierId(TierId tid) noexcept { + ptr_ += static_cast(tid) << kNumTierIdxOffset; + } + friend SlabAllocator; }; @@ -137,11 +182,12 @@ class PtrCompressor { : allocator_(allocator) {} const CompressedPtr compress(const PtrType* uncompressed) const { - return allocator_.compress(uncompressed); + return allocator_.compress(uncompressed, false /* isMultiTiered */); } PtrType* unCompress(const CompressedPtr compressed) const { - return static_cast(allocator_.unCompress(compressed)); + return static_cast( + allocator_.unCompress(compressed, false /* isMultiTiered */)); } bool operator==(const PtrCompressor& rhs) const noexcept { diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h index 509664afa6..1ce58857de 100644 --- a/cachelib/allocator/memory/MemoryAllocator.h +++ b/cachelib/allocator/memory/MemoryAllocator.h @@ -534,8 +534,9 @@ class MemoryAllocator { // as the original pointer is valid. // // @throw std::invalid_argument if the ptr is invalid. - CompressedPtr CACHELIB_INLINE compress(const void* ptr) const { - return slabAllocator_.compress(ptr); + CompressedPtr CACHELIB_INLINE compress(const void* ptr, + bool isMultiTiered) const { + return slabAllocator_.compress(ptr, isMultiTiered); } // retrieve the raw pointer corresponding to the compressed pointer. This is @@ -546,8 +547,9 @@ class MemoryAllocator { // @return the raw pointer corresponding to this compressed pointer. // // @throw std::invalid_argument if the compressed pointer is invalid. - void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr) const { - return slabAllocator_.unCompress(cPtr); + void* CACHELIB_INLINE unCompress(const CompressedPtr cPtr, + bool isMultiTiered) const { + return slabAllocator_.unCompress(cPtr, isMultiTiered); } // a special implementation of pointer compression for benchmarking purposes. diff --git a/cachelib/allocator/memory/Slab.h b/cachelib/allocator/memory/Slab.h index 4784bee8e9..897ad4e349 100644 --- a/cachelib/allocator/memory/Slab.h +++ b/cachelib/allocator/memory/Slab.h @@ -50,6 +50,8 @@ namespace cachelib { * independantly by the SlabAllocator. */ +// identifier for the memory tier +using TierId = int8_t; // identifier for the memory pool using PoolId = int8_t; // identifier for the allocation class diff --git a/cachelib/allocator/memory/SlabAllocator.cpp b/cachelib/allocator/memory/SlabAllocator.cpp index ade5a8e535..d840422a1a 100644 --- a/cachelib/allocator/memory/SlabAllocator.cpp +++ b/cachelib/allocator/memory/SlabAllocator.cpp @@ -48,7 +48,6 @@ using PtrType = CompressedPtr::PtrType; constexpr uint64_t SlabAllocator::kAddressMask; constexpr PtrType CompressedPtr::kAllocIdxMask; constexpr unsigned int CompressedPtr::kNumAllocIdxBits; -constexpr unsigned int CompressedPtr::kNumSlabIdxBits; constexpr unsigned int SlabAllocator::kLockSleepMS; constexpr size_t SlabAllocator::kPagesPerStep; diff --git a/cachelib/allocator/memory/SlabAllocator.h b/cachelib/allocator/memory/SlabAllocator.h index 5f5bf3265a..d82cf5b947 100644 --- a/cachelib/allocator/memory/SlabAllocator.h +++ b/cachelib/allocator/memory/SlabAllocator.h @@ -225,7 +225,8 @@ class SlabAllocator { // the corresponding memory allocator. trying to inline this just increases // the code size and does not move the needle on the benchmarks much. // Calling this with invalid input in optimized build is undefined behavior. - CompressedPtr CACHELIB_INLINE compress(const void* ptr) const { + CompressedPtr CACHELIB_INLINE compress(const void* ptr, + bool isMultiTiered) const { if (ptr == nullptr) { return CompressedPtr{}; } @@ -246,18 +247,23 @@ class SlabAllocator { static_cast(reinterpret_cast(ptr) - reinterpret_cast(slab)) / allocSize; - return CompressedPtr{slabIndex, allocIdx}; + return CompressedPtr{slabIndex, allocIdx, isMultiTiered}; } // uncompress the point and return the raw ptr. This function never throws // in optimized build and assumes that the caller is responsible for calling // it with a valid compressed pointer. - void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const { + void* CACHELIB_INLINE unCompress(const CompressedPtr ptr, + bool isMultiTiered) const { if (ptr.isNull()) { return nullptr; } - const SlabIdx slabIndex = ptr.getSlabIdx(); + /* TODO: isMultiTiered set to false by default. + Multi-tiering flag will have no impact till + rest of the multi-tiering changes are merged. + */ + const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered); const uint32_t allocIdx = ptr.getAllocIdx(); const Slab* slab = &slabMemoryStart_[slabIndex]; diff --git a/cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp b/cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp index cbd8cbef17..a19b9749a6 100644 --- a/cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp +++ b/cachelib/allocator/memory/tests/MemoryAllocatorTest.cpp @@ -401,13 +401,28 @@ TEST_F(MemoryAllocatorTest, PointerCompression) { for (const auto& pool : poolAllocs) { const auto& allocs = pool.second; for (const auto* alloc : allocs) { - CompressedPtr ptr = m.compress(alloc); + CompressedPtr ptr = m.compress(alloc, false /* isMultiTiered */); ASSERT_FALSE(ptr.isNull()); - ASSERT_EQ(alloc, m.unCompress(ptr)); + ASSERT_EQ(alloc, m.unCompress(ptr, false /* isMultiTiered */)); } } - ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr))); + ASSERT_EQ(nullptr, + m.unCompress(m.compress(nullptr, false /* isMultiTiered */), + false /* isMultiTiered */)); + + // test pointer compression with multi-tier + for (const auto& pool : poolAllocs) { + const auto& allocs = pool.second; + for (const auto* alloc : allocs) { + CompressedPtr ptr = m.compress(alloc, true /* isMultiTiered */); + ASSERT_FALSE(ptr.isNull()); + ASSERT_EQ(alloc, m.unCompress(ptr, true /* isMultiTiered */)); + } + } + + ASSERT_EQ(nullptr, m.unCompress(m.compress(nullptr, true /* isMultiTiered */), + true /* isMultiTiered */)); } TEST_F(MemoryAllocatorTest, Restorable) { diff --git a/cachelib/benchmarks/PtrCompressionBench.cpp b/cachelib/benchmarks/PtrCompressionBench.cpp index aeaa2c3b11..5daefc146f 100644 --- a/cachelib/benchmarks/PtrCompressionBench.cpp +++ b/cachelib/benchmarks/PtrCompressionBench.cpp @@ -61,7 +61,8 @@ void buildAllocs(size_t poolSize) { void* alloc = ma->allocate(pid, size); XDCHECK_GE(size, CompressedPtr::getMinAllocSize()); if (alloc != nullptr) { - validAllocs.push_back({alloc, ma->compress(alloc)}); + validAllocs.push_back( + {alloc, ma->compress(alloc, false /* isMultiTiered */)}); validAllocsAlt.push_back({alloc, ma->compressAlt(alloc)}); numAllocations++; } @@ -83,7 +84,7 @@ BENCHMARK(CompressionAlt) { BENCHMARK_RELATIVE(Compression) { for (const auto& alloc : validAllocs) { - CompressedPtr c = m->compress(alloc.first); + CompressedPtr c = m->compress(alloc.first, false /* isMultiTiered */); folly::doNotOptimizeAway(c); } } @@ -97,7 +98,7 @@ BENCHMARK(DeCompressAlt) { BENCHMARK_RELATIVE(DeCompress) { for (const auto& alloc : validAllocs) { - void* ptr = m->unCompress(alloc.second); + void* ptr = m->unCompress(alloc.second, false /* isMultiTiered */); folly::doNotOptimizeAway(ptr); } }