-
Notifications
You must be signed in to change notification settings - Fork 291
Compressed pointer addressing in single and 2-tier mode #188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,18 +27,29 @@ namespace cachelib { | |
|
||
class SlabAllocator; | ||
|
||
// the following are for pointer compression for the memory allocator. We | ||
// compress pointers by storing the slab index and the alloc index of the | ||
// allocation inside the slab. With slab worth kNumSlabBits of data, if we | ||
// have the min allocation size as 64 bytes, that requires kNumSlabBits - 6 | ||
// bits for storing the alloc index. This leaves the remaining (32 - | ||
// (kNumSlabBits - 6)) bits for the slab index. Hence we can index 256 GiB | ||
// of memory in slabs and index anything more than 64 byte allocations inside | ||
// the slab using a 32 bit representation. | ||
// | ||
// This CompressedPtr makes decompression fast by staying away from division and | ||
// modulo arithmetic and doing those during the compression time. We most often | ||
// decompress a CompressedPtr than compress a pointer while creating one. | ||
// decompress a CompressedPtr than compress a pointer while creating one. This | ||
// is used for pointer compression by the memory allocator. | ||
|
||
// We compress pointers by storing the tier index, slab index and alloc index of | ||
// the allocation inside the slab. | ||
|
||
// In original design (without memory tiers): | ||
// Each slab addresses 22 bits of allocations (kNumSlabBits). This is split into | ||
// allocation index and allocation size. If we have the min allocation size of | ||
// 64 bytes (kMinAllocPower = 6 bits), remaining kNumSlabBits(22) - | ||
// kMinAllocPower(6) = 16 bits for storing the alloc index. This leaves the | ||
// remaining 32 - (kNumSlabBits - kMinAllocPower) = 16 bits for the slab | ||
// index. Hence we can index 256 GiB of memory. | ||
|
||
// In multi-tier design: | ||
// kNumSlabIds and kMinAllocPower remains unchanged. The tier id occupies the | ||
// 32nd bit only since its value cannot exceed kMaxTiers(2). This leaves the | ||
// remaining 32 - (kNumSlabBits - kMinAllocPower) - 1 bit for tier id = 15 bits | ||
// for the slab index. Hence we can index 128 GiB of memory per tier in | ||
// multi-tier configuration. | ||
|
||
class CACHELIB_PACKED_ATTR CompressedPtr { | ||
public: | ||
using PtrType = uint32_t; | ||
|
@@ -62,9 +73,10 @@ class CACHELIB_PACKED_ATTR CompressedPtr { | |
return static_cast<uint32_t>(1) << (Slab::kMinAllocPower); | ||
} | ||
|
||
// maximum adressable memory for pointer compression to work. | ||
// maximum addressable memory for pointer compression to work. | ||
static constexpr size_t getMaxAddressableSize() noexcept { | ||
return static_cast<size_t>(1) << (kNumSlabIdxBits + Slab::kNumSlabBits); | ||
return static_cast<size_t>(1) | ||
<< (numSlabIdxBits(false) + Slab::kNumSlabBits); | ||
} | ||
|
||
// default construct to nullptr. | ||
|
@@ -89,8 +101,11 @@ class CACHELIB_PACKED_ATTR CompressedPtr { | |
PtrType ptr_{kNull}; | ||
|
||
// create a compressed pointer for a valid memory allocation. | ||
CompressedPtr(uint32_t slabIdx, uint32_t allocIdx) | ||
: ptr_(compress(slabIdx, allocIdx)) {} | ||
CompressedPtr(uint32_t slabIdx, | ||
uint32_t allocIdx, | ||
bool isMultiTiered, | ||
TierId tid = 0) | ||
: ptr_(compress(slabIdx, allocIdx, isMultiTiered, tid)) {} | ||
|
||
constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {} | ||
|
||
|
@@ -100,33 +115,63 @@ class CACHELIB_PACKED_ATTR CompressedPtr { | |
static constexpr unsigned int kNumAllocIdxBits = | ||
Slab::kNumSlabBits - Slab::kMinAllocPower; | ||
|
||
// Use 32nd bit position for TierId | ||
static constexpr unsigned int kNumTierIdxOffset = 31; | ||
|
||
static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1; | ||
|
||
// Number of bits for the slab index. This will be the top 16 bits of the | ||
// compressed ptr. | ||
static constexpr unsigned int kNumSlabIdxBits = | ||
NumBits<PtrType>::value - kNumAllocIdxBits; | ||
// kNumTierIdxBits most significant bits | ||
static constexpr PtrType kTierIdxMask = (PtrType)1 << kNumTierIdxOffset; | ||
|
||
// Number of bits for the slab index. | ||
// If CacheLib is single tiered, slab index will be the top 16 bits | ||
// of the compressed ptr. | ||
// Else if CacheLib is multi-tiered, the topmost 32nd bit will be | ||
// reserved for tier id. The following 15 bits will be reserved for | ||
// the slab index. | ||
static constexpr unsigned int numSlabIdxBits(bool isMultiTiered) { | ||
return kNumTierIdxOffset - kNumAllocIdxBits + (!isMultiTiered); | ||
} | ||
|
||
// Compress the given slabIdx and allocIdx into a 32-bit compressed | ||
// pointer. | ||
static PtrType compress(uint32_t slabIdx, uint32_t allocIdx) noexcept { | ||
static PtrType compress(uint32_t slabIdx, | ||
uint32_t allocIdx, | ||
bool isMultiTiered, | ||
TierId tid) noexcept { | ||
XDCHECK_LE(allocIdx, kAllocIdxMask); | ||
XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1); | ||
return (slabIdx << kNumAllocIdxBits) + allocIdx; | ||
XDCHECK_LT(slabIdx, (1u << numSlabIdxBits(isMultiTiered)) - 1); | ||
if (!isMultiTiered) { | ||
return (slabIdx << kNumAllocIdxBits) + allocIdx; | ||
} | ||
return (static_cast<uint32_t>(tid) << kNumTierIdxOffset) + | ||
(slabIdx << kNumAllocIdxBits) + allocIdx; | ||
} | ||
|
||
// Get the slab index of the compressed ptr | ||
uint32_t getSlabIdx() const noexcept { | ||
uint32_t getSlabIdx(bool isMultiTiered) const noexcept { | ||
XDCHECK(!isNull()); | ||
return static_cast<uint32_t>(ptr_ >> kNumAllocIdxBits); | ||
auto noTierIdPtr = isMultiTiered ? ptr_ & ~kTierIdxMask : ptr_; | ||
return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits); | ||
} | ||
|
||
// Get the allocation index of the compressed ptr | ||
uint32_t getAllocIdx() const noexcept { | ||
XDCHECK(!isNull()); | ||
// Note: tid check not required in ptr_ since only | ||
// the lower 16 bits are being read here. | ||
return static_cast<uint32_t>(ptr_ & kAllocIdxMask); | ||
} | ||
|
||
uint32_t getTierId(bool isMultiTiered) const noexcept { | ||
XDCHECK(!isNull()); | ||
return isMultiTiered ? static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset) : 0; | ||
} | ||
|
||
void setTierId(TierId tid) noexcept { | ||
ptr_ += static_cast<uint32_t>(tid) << kNumTierIdxOffset; | ||
Comment on lines
+171
to
+172
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the use of this? Is this needed for succeeding changes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, It will be referred to in subsequent upstream PRs. For complete picture, please refer to intel#56 |
||
} | ||
|
||
friend SlabAllocator; | ||
}; | ||
|
||
|
@@ -137,11 +182,12 @@ class PtrCompressor { | |
: allocator_(allocator) {} | ||
|
||
const CompressedPtr compress(const PtrType* uncompressed) const { | ||
return allocator_.compress(uncompressed); | ||
return allocator_.compress(uncompressed, false /* isMultiTiered */); | ||
} | ||
|
||
PtrType* unCompress(const CompressedPtr compressed) const { | ||
return static_cast<PtrType*>(allocator_.unCompress(compressed)); | ||
return static_cast<PtrType*>( | ||
allocator_.unCompress(compressed, false /* isMultiTiered */)); | ||
} | ||
|
||
bool operator==(const PtrCompressor& rhs) const noexcept { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -225,7 +225,8 @@ class SlabAllocator { | |
// the corresponding memory allocator. trying to inline this just increases | ||
// the code size and does not move the needle on the benchmarks much. | ||
// Calling this with invalid input in optimized build is undefined behavior. | ||
CompressedPtr CACHELIB_INLINE compress(const void* ptr) const { | ||
CompressedPtr CACHELIB_INLINE compress(const void* ptr, | ||
bool isMultiTiered) const { | ||
if (ptr == nullptr) { | ||
return CompressedPtr{}; | ||
} | ||
|
@@ -246,18 +247,23 @@ class SlabAllocator { | |
static_cast<uint32_t>(reinterpret_cast<const uint8_t*>(ptr) - | ||
reinterpret_cast<const uint8_t*>(slab)) / | ||
allocSize; | ||
return CompressedPtr{slabIndex, allocIdx}; | ||
return CompressedPtr{slabIndex, allocIdx, isMultiTiered}; | ||
Comment on lines
-249
to
+250
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, tier id is not set here. How are you going to provide the tier id? I think you are going to have a separate What about the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please refer to this commit intel@2704ac8#diff-a6542b6dbf2cfb5e03e82205ee960757ab2b50de7bc25085089a3cffba40ae87 which shows how tier Id is passed to the alloctor.compress() methods. This change is due to be sent for upstream review soon. |
||
} | ||
|
||
// uncompress the point and return the raw ptr. This function never throws | ||
// in optimized build and assumes that the caller is responsible for calling | ||
// it with a valid compressed pointer. | ||
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr) const { | ||
void* CACHELIB_INLINE unCompress(const CompressedPtr ptr, | ||
bool isMultiTiered) const { | ||
if (ptr.isNull()) { | ||
return nullptr; | ||
} | ||
|
||
const SlabIdx slabIndex = ptr.getSlabIdx(); | ||
/* TODO: isMultiTiered set to false by default. | ||
Multi-tiering flag will have no impact till | ||
rest of the multi-tiering changes are merged. | ||
*/ | ||
const SlabIdx slabIndex = ptr.getSlabIdx(isMultiTiered); | ||
const uint32_t allocIdx = ptr.getAllocIdx(); | ||
const Slab* slab = &slabMemoryStart_[slabIndex]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comment/TODO here just to clarify multi-tier has no effect here in accessing slab memory since we haven't incorporated the actual multi-tier logic yet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
isMultiTiered
looks redundant. We are storing the tid at the MSB of the compressed ptr. In this case, we can just interpretisMultiTiered = false
iftid == 0
. Additional logic inCompressedPtr
can be simplified a lot in this way.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The bit-packing format changes for multi-tier compressed pointer. The 32nd bit is reserved for tid only for multi-tiered compressed pointer. When config is single-tiered, compressed pointer bit packing used the original design - 32nd bit is not reserved for tid.
tid ==0 means cachelib may or may not be multi-tiered.