-
Notifications
You must be signed in to change notification settings - Fork 277
/
Copy pathNavySetup.cpp
373 lines (323 loc) · 14 KB
/
NavySetup.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "cachelib/allocator/nvmcache/NavySetup.h"
#include <folly/File.h>
#include <folly/logging/xlog.h>
#include "cachelib/allocator/nvmcache/NavyConfig.h"
#include "cachelib/navy/Factory.h"
#include "cachelib/navy/block_cache/HitsReinsertionPolicy.h"
#include "cachelib/navy/scheduler/JobScheduler.h"
namespace facebook {
namespace cachelib {
namespace {
// Default value for (almost) 1TB flash device = 5GB reserved for metadata
constexpr double kDefaultMetadataPercent = 0.5;
uint64_t megabytesToBytes(uint64_t mb) { return mb << 20; }
// Return a number that's equal or smaller than @num and aligned on @alignment
uint64_t alignDown(uint64_t num, uint64_t alignment) {
return num - num % alignment;
}
// Return a number that's equal or bigger than @num and aligned on @alignment
uint64_t alignUp(uint64_t num, uint64_t alignment) {
return alignDown(num + alignment - 1, alignment);
}
uint64_t setupBigHash(const navy::BigHashConfig& bigHashConfig,
uint32_t ioAlignSize,
uint64_t totalCacheSize,
uint64_t metadataSize,
cachelib::navy::CacheProto& proto) {
auto bucketSize = bigHashConfig.getBucketSize();
if (bucketSize != alignUp(bucketSize, ioAlignSize)) {
throw std::invalid_argument(
folly::sformat("Bucket size: {} is not aligned to ioAlignSize: {}",
bucketSize, ioAlignSize));
}
// If enabled, BigHash's storage starts after BlockCache's.
const auto sizeReservedForBigHash =
totalCacheSize * bigHashConfig.getSizePct() / 100ul;
const uint64_t bigHashCacheOffset =
alignUp(totalCacheSize - sizeReservedForBigHash, bucketSize);
const uint64_t bigHashCacheSize =
alignDown(totalCacheSize - bigHashCacheOffset, bucketSize);
auto bigHash = cachelib::navy::createBigHashProto();
bigHash->setLayout(bigHashCacheOffset, bigHashCacheSize, bucketSize);
// Bucket Bloom filter size, bytes
//
// Experiments showed that if we have 16 bytes for BF with 25 entries,
// then optimal number of hash functions is 4 and false positive rate
// below 10%.
if (bigHashConfig.isBloomFilterEnabled()) {
// We set 4 hash function unconditionally. This seems to be the best
// for our use case. If BF size to bucket size ratio gets lower, try
// to reduce number of hashes.
constexpr uint32_t kNumHashes = 4;
const uint32_t bitsPerHash =
bigHashConfig.getBucketBfSize() * 8 / kNumHashes;
bigHash->setBloomFilter(kNumHashes, bitsPerHash);
}
proto.setBigHash(std::move(bigHash), bigHashConfig.getSmallItemMaxSize());
if (bigHashCacheOffset <= metadataSize) {
throw std::invalid_argument("NVM cache size is not big enough!");
}
XLOG(INFO) << "metadataSize: " << metadataSize
<< " bigHashCacheOffset: " << bigHashCacheOffset
<< " bigHashCacheSize: " << bigHashCacheSize;
return bigHashCacheOffset;
}
uint64_t setupKangaroo(const navy::KangarooConfig& kangarooConfig,
uint32_t ioAlignSize,
uint64_t totalCacheSize,
uint64_t metadataSize,
cachelib::navy::CacheProto& proto) {
auto bucketSize = kangarooConfig.getBucketSize();
if (bucketSize != alignUp(bucketSize, ioAlignSize)) {
throw std::invalid_argument(
folly::sformat("Bucket size: {} is not aligned to ioAlignSize: {}",
bucketSize, ioAlignSize));
}
// If enabled, Kangaroo storage starts after BlockCache's.
const auto sizeReservedForKangaroo =
totalCacheSize * kangarooConfig.getSizePct() / 100ul;
const uint64_t kangarooCacheOffset =
alignUp(totalCacheSize - sizeReservedForKangaroo, bucketSize);
const uint64_t kangarooCacheSize =
alignDown(totalCacheSize - kangarooCacheOffset, bucketSize);
auto kangaroo = cachelib::navy::createKangarooProto();
kangaroo->setLayout(kangarooCacheOffset, kangarooCacheSize, bucketSize);
// Bucket Bloom filter size, bytes
//
// Experiments showed that if we have 16 bytes for BF with 25 entries,
// then optimal number of hash functions is 4 and false positive rate
// below 10%.
if (kangarooConfig.isBloomFilterEnabled()) {
// We set 4 hash function unconditionally. This seems to be the best
// for our use case. If BF size to bucket size ratio gets lower, try
// to reduce number of hashes.
constexpr uint32_t kNumHashes = 4;
const uint32_t bitsPerHash =
kangarooConfig.getBucketBfSize() * 8 / kNumHashes;
kangaroo->setBloomFilter(kNumHashes, bitsPerHash);
}
if (kangarooConfig.getLogSizePct()) {
const uint64_t logSize = alignDown(
kangarooCacheSize * kangarooConfig.getLogSizePct() / 100ul,
bucketSize * 64);
const uint32_t threshold = kangarooConfig.getLogThreshold();
const uint64_t indexPerPhysical = kangarooConfig.getIndexPerPhysicalPartitions();
const uint64_t physical = kangarooConfig.getPhysicalPartitions();
kangaroo->setLog(logSize, threshold, physical, indexPerPhysical);
}
proto.setKangaroo(std::move(kangaroo), kangarooConfig.getSmallItemMaxSize());
if (kangarooCacheOffset <= metadataSize) {
throw std::invalid_argument("NVM cache size is not big enough!");
}
XLOG(INFO) << "metadataSize: " << metadataSize
<< " kangarooCacheOffset: " << kangarooCacheOffset
<< " kangarooCacheSize: " << kangarooCacheSize;
return kangarooCacheOffset;
}
void setupBlockCache(const navy::BlockCacheConfig& blockCacheConfig,
uint64_t blockCacheSize,
uint32_t ioAlignSize,
uint64_t metadataSize,
bool usesRaidFiles,
cachelib::navy::CacheProto& proto) {
auto regionSize = blockCacheConfig.getRegionSize();
if (regionSize != alignUp(regionSize, ioAlignSize)) {
throw std::invalid_argument(
folly::sformat("Region size: {} is not aligned to ioAlignSize: {}",
regionSize, ioAlignSize));
}
// Adjust starting size of block cache to ensure it is aligned to region
// size which is what we use for the stripe size when using RAID0Device.
uint64_t blockCacheOffset = metadataSize;
if (usesRaidFiles) {
auto adjustedBlockCacheOffset = alignUp(blockCacheOffset, regionSize);
auto cacheSizeAdjustment = adjustedBlockCacheOffset - blockCacheOffset;
XDCHECK_LT(cacheSizeAdjustment, blockCacheSize);
blockCacheSize -= cacheSizeAdjustment;
blockCacheOffset = adjustedBlockCacheOffset;
}
blockCacheSize = alignDown(blockCacheSize, regionSize);
XLOG(INFO) << "blockcache: starting offset: " << blockCacheOffset
<< ", block cache size: " << blockCacheSize;
auto blockCache = cachelib::navy::createBlockCacheProto();
blockCache->setLayout(blockCacheOffset, blockCacheSize, regionSize);
blockCache->setChecksum(blockCacheConfig.getDataChecksum());
// set eviction policy
auto segmentRatio = blockCacheConfig.getSFifoSegmentRatio();
if (!segmentRatio.empty()) {
blockCache->setSegmentedFifoEvictionPolicy(std::move(segmentRatio));
} else if (blockCacheConfig.isLruEnabled()) {
blockCache->setLruEvictionPolicy();
} else {
blockCache->setFifoEvictionPolicy();
}
auto sizeClasses = blockCacheConfig.getSizeClasses();
if (!sizeClasses.empty()) {
blockCache->setSizeClasses(std::move(sizeClasses));
}
blockCache->setCleanRegionsPool(blockCacheConfig.getCleanRegions());
// set reinsertion policy
auto reinsertionHitsThreshold =
blockCacheConfig.getReinsertionHitsThreshold();
if (reinsertionHitsThreshold > 0) {
blockCache->setHitsReinsertionPolicy(reinsertionHitsThreshold);
}
auto reinsertionPercentageThreshold =
blockCacheConfig.getReinsertionPctThreshold();
if (reinsertionPercentageThreshold > 0) {
blockCache->setPercentageReinsertionPolicy(reinsertionPercentageThreshold);
}
blockCache->setNumInMemBuffers(blockCacheConfig.getNumInMemBuffers());
proto.setBlockCache(std::move(blockCache));
}
// Setup the CacheProto, includes BigHashProto and BlockCacheProto,
// which is the configuration interface from Navy engine, and can be used to
// create BigHash and BlockCache engines.
//
// @param config the configured NavyConfig
// @param device the flash device
// @param proto the output CacheProto
//
// @throw std::invalid_argument if input arguments are invalid
void setupCacheProtos(const navy::NavyConfig& config,
const navy::Device& device,
cachelib::navy::CacheProto& proto) {
auto getDefaultMetadataSize = [](size_t size, size_t alignment) {
XDCHECK(folly::isPowTwo(alignment));
auto mask = ~(alignment - 1);
return (static_cast<size_t>(kDefaultMetadataPercent * size / 100) & mask);
};
auto ioAlignSize = device.getIOAlignmentSize();
const uint64_t totalCacheSize = device.getSize();
auto metadataSize = config.getDeviceMetadataSize();
if (metadataSize == 0) {
metadataSize = getDefaultMetadataSize(totalCacheSize, ioAlignSize);
}
metadataSize = alignUp(metadataSize, ioAlignSize);
if (metadataSize >= totalCacheSize) {
throw std::invalid_argument{
folly::sformat("Invalid metadata size: {}. Cache size: {}",
metadataSize,
totalCacheSize)};
}
proto.setMetadataSize(metadataSize);
uint64_t blockCacheSize = 0;
// Set up BigHash if enabled
if (config.isBigHashEnabled()) {
auto bigHashCacheOffset = setupBigHash(config.bigHash(), ioAlignSize,
totalCacheSize, metadataSize, proto);
blockCacheSize = bigHashCacheOffset - metadataSize;
} else {
XLOG(INFO) << "metadataSize: " << metadataSize << ". No bighash.";
blockCacheSize = totalCacheSize - metadataSize;
}
// Set up Kangaroo if enabled
if (config.isKangarooEnabled()) {
auto kangarooCacheOffset = setupKangaroo(config.kangaroo(), ioAlignSize,
totalCacheSize, metadataSize, proto);
blockCacheSize = kangarooCacheOffset - metadataSize;
} else {
XLOG(INFO) << "metadataSize: " << metadataSize << ". No kangaroo.";
blockCacheSize = totalCacheSize - metadataSize;
}
// Set up BlockCache if enabled
if (blockCacheSize > 0) {
setupBlockCache(config.blockCache(), blockCacheSize, ioAlignSize,
metadataSize, config.usesRaidFiles(), proto);
}
}
void setAdmissionPolicy(const cachelib::navy::NavyConfig& config,
cachelib::navy::CacheProto& proto) {
const std::string& policyName = config.getAdmissionPolicy();
if (policyName.empty()) {
return;
}
if (policyName == navy::NavyConfig::kAdmPolicyRandom) {
proto.setRejectRandomAdmissionPolicy(config.randomAdmPolicy());
} else if (policyName == navy::NavyConfig::kAdmPolicyDynamicRandom) {
proto.setDynamicRandomAdmissionPolicy(config.dynamicRandomAdmPolicy());
} else {
throw std::invalid_argument{
folly::sformat("invalid policy name {}", policyName)};
}
}
std::unique_ptr<cachelib::navy::JobScheduler> createJobScheduler(
const navy::NavyConfig& config) {
auto readerThreads = config.getReaderThreads();
auto writerThreads = config.getWriterThreads();
auto reqOrderShardsPower = config.getNavyReqOrderingShards();
return cachelib::navy::createOrderedThreadPoolJobScheduler(
readerThreads, writerThreads, reqOrderShardsPower);
}
} // namespace
std::unique_ptr<cachelib::navy::Device> createDevice(
const navy::NavyConfig& config,
std::shared_ptr<navy::DeviceEncryptor> encryptor) {
auto blockSize = config.getBlockSize();
auto maxDeviceWriteSize = config.getDeviceMaxWriteSize();
if (config.usesRaidFiles()) {
auto stripeSize = config.getRaidStripeSize();
return cachelib::navy::createRAIDDevice(
config.getRaidPaths(),
alignDown(config.getFileSize(), stripeSize),
config.getTruncateFile(),
blockSize,
stripeSize,
std::move(encryptor),
maxDeviceWriteSize > 0 ? alignDown(maxDeviceWriteSize, blockSize) : 0);
} else if (config.usesSimpleFile()) {
return cachelib::navy::createFileDevice(
config.getFileName(),
config.getFileSize(),
config.getTruncateFile(),
blockSize,
std::move(encryptor),
maxDeviceWriteSize > 0 ? alignDown(maxDeviceWriteSize, blockSize) : 0);
} else {
return cachelib::navy::createMemoryDevice(config.getFileSize(),
std::move(encryptor), blockSize);
}
}
std::unique_ptr<navy::AbstractCache> createNavyCache(
const navy::NavyConfig& config,
navy::DestructorCallback cb,
bool truncate,
std::shared_ptr<navy::DeviceEncryptor> encryptor) {
auto device = createDevice(config, std::move(encryptor));
auto proto = cachelib::navy::createCacheProto();
auto* devicePtr = device.get();
proto->setDevice(std::move(device));
proto->setJobScheduler(createJobScheduler(config));
proto->setMaxConcurrentInserts(config.getMaxConcurrentInserts());
proto->setMaxParcelMemory(megabytesToBytes(config.getMaxParcelMemoryMB()));
setAdmissionPolicy(config, *proto);
proto->setDestructorCallback(cb);
setupCacheProtos(config, *devicePtr, *proto);
auto cache = createCache(std::move(proto));
XDCHECK(cache != nullptr);
if (truncate) {
cache->reset();
return cache;
}
if (!cache->recover()) {
XLOG(WARN) << "No recovery data found. Continuing with clean cache.";
}
return cache;
}
} // namespace cachelib
} // namespace facebook