Skip to content

Commit 71bcd51

Browse files
author
Sara McAllister
committed
Kangaroo module connection
1 parent d3ef924 commit 71bcd51

File tree

9 files changed

+403
-3
lines changed

9 files changed

+403
-3
lines changed

cachelib/allocator/nvmcache/NavyConfig.cpp

+62
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,52 @@ BigHashConfig& BigHashConfig::setSizePctAndMaxItemSize(
229229
return *this;
230230
}
231231

232+
KangarooConfig& KangarooConfig::setSizePctAndMaxItemSize(
233+
unsigned int sizePct, uint64_t smallItemMaxSize) {
234+
if (sizePct > 100) {
235+
throw std::invalid_argument(folly::sformat(
236+
"to enable Kangaroo, Kangaroo size pct should be in the range of [0, 100]"
237+
", but {} is set",
238+
sizePct));
239+
}
240+
if (sizePct == 0) {
241+
XLOG(INFO) << "Kangaroo is not configured";
242+
}
243+
sizePct_ = sizePct;
244+
smallItemMaxSize_ = smallItemMaxSize;
245+
return *this;
246+
}
247+
248+
KangarooConfig& KangarooConfig::setLog(unsigned int sizePct,
249+
uint64_t physicalPartitions,
250+
uint64_t indexPerPhysicalPartitions,
251+
uint32_t threshold) {
252+
if (sizePct > 100) {
253+
throw std::invalid_argument(folly::sformat(
254+
"to enable KangarooLog, KangarooLog size pct should be in the range of [0, 100]"
255+
", but {} is set",
256+
sizePct));
257+
}
258+
if (sizePct == 0) {
259+
XLOG(INFO) << "KangarooLog is not configured";
260+
}
261+
logSizePct_ = sizePct;
262+
if (indexPerPhysicalPartitions == 0) {
263+
throw std::invalid_argument(folly::sformat(
264+
"to enable KangarooLog, need >=1 index partitions per physical partition, {} is set",
265+
indexPerPhysicalPartitions));
266+
}
267+
if (physicalPartitions == 0) {
268+
throw std::invalid_argument(folly::sformat(
269+
"to enable KangarooLog, need >=1 physical partitions, {} is set",
270+
physicalPartitions));
271+
}
272+
physicalPartitions_ = physicalPartitions;
273+
indexPerPhysicalPartitions_ = indexPerPhysicalPartitions;
274+
threshold_ = threshold;
275+
return *this;
276+
}
277+
232278
void NavyConfig::setBigHash(unsigned int bigHashSizePct,
233279
uint32_t bigHashBucketSize,
234280
uint64_t bigHashBucketBfSize,
@@ -238,6 +284,22 @@ void NavyConfig::setBigHash(unsigned int bigHashSizePct,
238284
.setBucketSize(bigHashBucketSize)
239285
.setBucketBfSize(bigHashBucketBfSize);
240286
}
287+
288+
void NavyConfig::setKangaroo(unsigned int kangarooSizePct,
289+
uint32_t kangarooBucketSize,
290+
uint64_t kangarooBucketBfSize,
291+
uint64_t kangarooSmallItemMaxSize,
292+
uint64_t kangarooLogSizePct,
293+
uint64_t kangarooLogThreshold,
294+
uint64_t kangarooLogPhysicalPartitions,
295+
uint32_t kangarooLogIndexPerPhysicalPartitions) {
296+
kangarooConfig_
297+
.setSizePctAndMaxItemSize(kangarooSizePct, kangarooSmallItemMaxSize)
298+
.setBucketSize(kangarooBucketSize)
299+
.setBucketBfSize(kangarooBucketBfSize)
300+
.setLog(kangarooLogSizePct, kangarooLogThreshold,
301+
kangarooLogPhysicalPartitions, kangarooLogIndexPerPhysicalPartitions);
302+
}
241303
// job scheduler settings
242304
void NavyConfig::setNavyReqOrderingShards(uint64_t navyReqOrderingShards) {
243305
if (navyReqOrderingShards == 0) {

cachelib/allocator/nvmcache/NavyConfig.h

+111-2
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,90 @@ class BigHashConfig {
321321
uint64_t smallItemMaxSize_{};
322322
};
323323

324+
/**
325+
* KangarooConfig provides APIs for users to configure Kangaroo engine, which is
326+
* one part of NavyConfig.
327+
*
328+
* By this class, users can:
329+
* - enable Kangaroo by setting sizePct > 0
330+
* - set maximum item size
331+
* - set bucket size
332+
* - set bloom filter size (0 to disable bloom filter)
333+
* - set log percent and number of partitions
334+
* - get the values of all the above parameters
335+
*/
336+
class KangarooConfig {
337+
public:
338+
// Set Kangaroo device percentage and maximum item size(in bytes) to enable
339+
// Kangaroo engine. Default value of sizePct and smallItemMaxSize is 0,
340+
// meaning Kangaroo is not enabled.
341+
// @throw std::invalid_argument if sizePct is not in the range of
342+
// [0, 100].
343+
KangarooConfig& setSizePctAndMaxItemSize(unsigned int sizePct,
344+
uint64_t smallItemMaxSize);
345+
346+
// Set the bucket size in bytes for Kangaroo engine.
347+
// Default value is 4096.
348+
KangarooConfig& setBucketSize(uint32_t bucketSize) noexcept {
349+
bucketSize_ = bucketSize;
350+
return *this;
351+
}
352+
353+
// Set bloom filter size per bucket in bytes for Kangaroo engine.
354+
// 0 means bloom filter will not be applied. Default value is 8.
355+
KangarooConfig& setBucketBfSize(uint64_t bucketBfSize) noexcept {
356+
bucketBfSize_ = bucketBfSize;
357+
return *this;
358+
}
359+
360+
// Set bloom filter size per bucket in bytes for Kangaroo engine.
361+
// 0 means bloom filter will not be applied. Default value is 8.
362+
KangarooConfig& setLog(unsigned int sizePct,
363+
uint64_t physicalPartitions,
364+
uint64_t indexPerPhysicalParitions,
365+
uint32_t threshold);
366+
367+
bool isBloomFilterEnabled() const { return bucketBfSize_ > 0; }
368+
369+
unsigned int getSizePct() const { return sizePct_; }
370+
371+
uint32_t getBucketSize() const { return bucketSize_; }
372+
373+
uint64_t getBucketBfSize() const { return bucketBfSize_; }
374+
375+
uint64_t getSmallItemMaxSize() const { return smallItemMaxSize_; }
376+
377+
unsigned int getLogSizePct() const { return logSizePct_; }
378+
379+
uint64_t getPhysicalPartitions() const { return physicalPartitions_; }
380+
381+
uint64_t getIndexPerPhysicalPartitions() const { return indexPerPhysicalPartitions_; }
382+
383+
uint32_t getLogThreshold() const { return threshold_; }
384+
385+
private:
386+
// Percentage of how much of the device out of all is given to Kangaroo
387+
// engine in Navy, e.g. 50.
388+
unsigned int sizePct_{0};
389+
// Navy Kangaroo engine's bucket size (must be multiple of the minimum
390+
// device io block size).
391+
// This size determines how big each bucket is and what is the physical
392+
// write granularity onto the device.
393+
uint32_t bucketSize_{4096};
394+
// The bloom filter size per bucket in bytes for Navy Kangaroo engine
395+
uint64_t bucketBfSize_{8};
396+
// The maximum item size to put into Navy Kangaroo engine.
397+
uint64_t smallItemMaxSize_{};
398+
// Percent of Kangaroo to dedicate to KangarooLog
399+
unsigned int logSizePct_{0};
400+
// Number of physical partitions of KangarooLog
401+
uint64_t physicalPartitions_{1};
402+
// Number of index partitions of KangarooLog
403+
uint64_t indexPerPhysicalPartitions_{1};
404+
// Threshold for moving items from KangarooLog to sets
405+
uint32_t threshold_{1};
406+
};
407+
324408
/**
325409
* NavyConfig provides APIs for users to set up Navy related settings for
326410
* NvmCache.
@@ -341,6 +425,7 @@ class NavyConfig {
341425
bool usesSimpleFile() const noexcept { return !fileName_.empty(); }
342426
bool usesRaidFiles() const noexcept { return raidPaths_.size() > 0; }
343427
bool isBigHashEnabled() const { return bigHashConfig_.getSizePct() > 0; }
428+
bool isKangarooEnabled() const { return kangarooConfig_.getSizePct() > 0; }
344429
std::map<std::string, std::string> serialize() const;
345430

346431
// Getters:
@@ -371,15 +456,21 @@ class NavyConfig {
371456
// Returns the threshold of classifying an item as small item or large item
372457
// for Navy engine.
373458
uint64_t getSmallItemThreshold() const {
374-
if (!isBigHashEnabled()) {
459+
if (isBigHashEnabled()) {
460+
return bigHashConfig_.getSmallItemMaxSize();
461+
} else if (isKangarooEnabled()) {
462+
return kangarooConfig_.getSmallItemMaxSize();
463+
} else {
375464
return 0;
376465
}
377-
return bigHashConfig_.getSmallItemMaxSize();
378466
}
379467

380468
// Return a const BlockCacheConfig to read values of its parameters.
381469
const BigHashConfig& bigHash() const { return bigHashConfig_; }
382470

471+
// Return a const KangarooConfig to read values of its parameters.
472+
const KangarooConfig& kangaroo() const { return kangarooConfig_; }
473+
383474
// Return a const BlockCacheConfig to read values of its parameters.
384475
const BlockCacheConfig& blockCache() const { return blockCacheConfig_; }
385476

@@ -506,6 +597,21 @@ class NavyConfig {
506597
uint64_t bigHashSmallItemMaxSize);
507598
// Return BigHashConfig for configuration.
508599
BigHashConfig& bigHash() noexcept { return bigHashConfig_; }
600+
601+
// ============ Kangaroo settings =============
602+
// (Deprecated) Set the parameters for Kangaroo.
603+
// @throw std::invalid_argument if kangarooSizePct is not in the range of
604+
// 0~100.
605+
void setKangaroo(unsigned int kangarooSizePct,
606+
uint32_t kangarooBucketSize,
607+
uint64_t kangarooBucketBfSize,
608+
uint64_t kangarooSmallItemMaxSize,
609+
uint64_t kangarooLogSizePct,
610+
uint64_t kangarooLogThreshold,
611+
uint64_t kangarooLogPhysicalPartitions,
612+
uint32_t kangarooLogIndexPerPhysicalPartitions);
613+
// Return KangarooConfig for configuration.
614+
KangarooConfig& kangaroo() noexcept { return kangarooConfig_; }
509615

510616
// ============ Job scheduler settings =============
511617
void setReaderAndWriterThreads(unsigned int readerThreads,
@@ -556,6 +662,9 @@ class NavyConfig {
556662

557663
// ============ BigHash settings =============
558664
BigHashConfig bigHashConfig_{};
665+
666+
// ============ Kangaroo settings =============
667+
KangarooConfig kangarooConfig_{};
559668

560669
// ============ Job scheduler settings =============
561670
// Number of asynchronous worker thread for read operation.

cachelib/allocator/nvmcache/NavySetup.cpp

+70
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,66 @@ uint64_t setupBigHash(const navy::BigHashConfig& bigHashConfig,
9393
return bigHashCacheOffset;
9494
}
9595

96+
uint64_t setupKangaroo(const navy::KangarooConfig& kangarooConfig,
97+
uint32_t ioAlignSize,
98+
uint64_t totalCacheSize,
99+
uint64_t metadataSize,
100+
cachelib::navy::CacheProto& proto) {
101+
auto bucketSize = kangarooConfig.getBucketSize();
102+
if (bucketSize != alignUp(bucketSize, ioAlignSize)) {
103+
throw std::invalid_argument(
104+
folly::sformat("Bucket size: {} is not aligned to ioAlignSize: {}",
105+
bucketSize, ioAlignSize));
106+
}
107+
108+
// If enabled, Kangaroo storage starts after BlockCache's.
109+
const auto sizeReservedForKangaroo =
110+
totalCacheSize * kangarooConfig.getSizePct() / 100ul;
111+
112+
const uint64_t kangarooCacheOffset =
113+
alignUp(totalCacheSize - sizeReservedForKangaroo, bucketSize);
114+
const uint64_t kangarooCacheSize =
115+
alignDown(totalCacheSize - kangarooCacheOffset, bucketSize);
116+
117+
auto kangaroo = cachelib::navy::createKangarooProto();
118+
kangaroo->setLayout(kangarooCacheOffset, kangarooCacheSize, bucketSize);
119+
120+
// Bucket Bloom filter size, bytes
121+
//
122+
// Experiments showed that if we have 16 bytes for BF with 25 entries,
123+
// then optimal number of hash functions is 4 and false positive rate
124+
// below 10%.
125+
if (kangarooConfig.isBloomFilterEnabled()) {
126+
// We set 4 hash function unconditionally. This seems to be the best
127+
// for our use case. If BF size to bucket size ratio gets lower, try
128+
// to reduce number of hashes.
129+
constexpr uint32_t kNumHashes = 4;
130+
const uint32_t bitsPerHash =
131+
kangarooConfig.getBucketBfSize() * 8 / kNumHashes;
132+
kangaroo->setBloomFilter(kNumHashes, bitsPerHash);
133+
}
134+
135+
if (kangarooConfig.getLogSizePct()) {
136+
const uint64_t logSize = alignDown(
137+
kangarooCacheSize * kangarooConfig.getLogSizePct() / 100ul,
138+
bucketSize * 64);
139+
const uint32_t threshold = kangarooConfig.getLogThreshold();
140+
const uint64_t indexPerPhysical = kangarooConfig.getIndexPerPhysicalPartitions();
141+
const uint64_t physical = kangarooConfig.getPhysicalPartitions();
142+
kangaroo->setLog(logSize, threshold, physical, indexPerPhysical);
143+
}
144+
145+
proto.setKangaroo(std::move(kangaroo), kangarooConfig.getSmallItemMaxSize());
146+
147+
if (kangarooCacheOffset <= metadataSize) {
148+
throw std::invalid_argument("NVM cache size is not big enough!");
149+
}
150+
XLOG(INFO) << "metadataSize: " << metadataSize
151+
<< " kangarooCacheOffset: " << kangarooCacheOffset
152+
<< " kangarooCacheSize: " << kangarooCacheSize;
153+
return kangarooCacheOffset;
154+
}
155+
96156
void setupBlockCache(const navy::BlockCacheConfig& blockCacheConfig,
97157
uint64_t blockCacheSize,
98158
uint32_t ioAlignSize,
@@ -204,6 +264,16 @@ void setupCacheProtos(const navy::NavyConfig& config,
204264
XLOG(INFO) << "metadataSize: " << metadataSize << ". No bighash.";
205265
blockCacheSize = totalCacheSize - metadataSize;
206266
}
267+
268+
// Set up Kangaroo if enabled
269+
if (config.isKangarooEnabled()) {
270+
auto kangarooCacheOffset = setupKangaroo(config.kangaroo(), ioAlignSize,
271+
totalCacheSize, metadataSize, proto);
272+
blockCacheSize = kangarooCacheOffset - metadataSize;
273+
} else {
274+
XLOG(INFO) << "metadataSize: " << metadataSize << ". No kangaroo.";
275+
blockCacheSize = totalCacheSize - metadataSize;
276+
}
207277

208278
// Set up BlockCache if enabled
209279
if (blockCacheSize > 0) {

cachelib/cachebench/cache/Cache-inl.h

+12
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,17 @@ Cache<Allocator>::Cache(const CacheConfig& config,
184184
config_.navyBloomFilterPerBucketSize,
185185
config_.navySmallItemMaxSize);
186186
}
187+
188+
if (config_.navyKangarooSizePct > 0) {
189+
nvmConfig.navyConfig.setKangaroo(config_.navyKangarooSizePct,
190+
config_.navyKangarooBucketSize,
191+
config_.navyBloomFilterPerBucketSize,
192+
config_.navySmallItemMaxSize,
193+
config_.navyKangarooLogSizePct,
194+
config_.navyKangarooLogThreshold,
195+
config_.navyKangarooLogPhysicalPartitions,
196+
config_.navyKangarooLogIndexPerPhysicalPartitions);
197+
}
187198

188199
nvmConfig.navyConfig.setMaxParcelMemoryMB(config_.navyParcelMemoryMB);
189200

@@ -524,6 +535,7 @@ Stats Cache<Allocator>::getStats() const {
524535
ret.numNvmItems = lookup("navy_bh_items") + lookup("navy_bc_items");
525536
ret.numNvmBytesWritten = lookup("navy_device_bytes_written");
526537
uint64_t now = fetchNandWrites();
538+
527539
if (now > nandBytesBegin_) {
528540
ret.numNvmNandBytesWritten = now - nandBytesBegin_;
529541
}

cachelib/cachebench/cache/Cache.h

+1
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ class Cache {
347347

348348
// reading of the nand bytes written for the benchmark if enabled.
349349
const uint64_t nandBytesBegin_{0};
350+
uint64_t writtenBytes_{0};
350351

351352
// latency stats of cachelib APIs inside cachebench
352353
mutable util::PercentileStats cacheFindLatency_;

cachelib/cachebench/util/CacheConfig.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
7474
JSONSetVal(configJson, navyWriterThreads);
7575
JSONSetVal(configJson, navyCleanRegions);
7676
JSONSetVal(configJson, navyAdmissionWriteRateMB);
77+
JSONSetVal(configJson, navyAdmissionProb);
7778
JSONSetVal(configJson, navyMaxConcurrentInserts);
7879
JSONSetVal(configJson, navyDataChecksum);
7980
JSONSetVal(configJson, navyNumInmemBuffers);
@@ -85,6 +86,13 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
8586

8687
JSONSetVal(configJson, mlNvmAdmissionPolicy);
8788
JSONSetVal(configJson, mlNvmAdmissionPolicyLocation);
89+
90+
JSONSetVal(configJson, navyKangarooSizePct);
91+
JSONSetVal(configJson, navyKangarooBucketSize);
92+
JSONSetVal(configJson, navyKangarooLogSizePct);
93+
JSONSetVal(configJson, navyKangarooLogThreshold);
94+
JSONSetVal(configJson, navyKangarooLogPhysicalPartitions);
95+
JSONSetVal(configJson, navyKangarooLogIndexPerPhysicalPartitions);
8896

8997
JSONSetVal(configJson, useTraceTimeStamp);
9098
JSONSetVal(configJson, printNvmCounters);
@@ -95,7 +103,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
95103
// if you added new fields to the configuration, update the JSONSetVal
96104
// to make them available for the json configs and increment the size
97105
// below
98-
checkCorrectSize<CacheConfig, 680>();
106+
checkCorrectSize<CacheConfig, 736>();
99107

100108
if (numPools != poolSizes.size()) {
101109
throw std::invalid_argument(folly::sformat(

0 commit comments

Comments
 (0)