Skip to content

Commit 40f1b33

Browse files
committed
Adding RAID 0 support for fdp path
Summary: This enables RAID0 in fdp io path by spliting io across all devices. Signed-off-by: Vikash Kumar <[email protected]>
1 parent 78233f0 commit 40f1b33

File tree

1 file changed

+61
-56
lines changed

1 file changed

+61
-56
lines changed

cachelib/navy/common/Device.cpp

Lines changed: 61 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -220,12 +220,13 @@ class CompletionHandler : public folly::EventHandler {
220220
// Per-thread context for AsyncIO like libaio or io_uring
221221
class AsyncIoContext : public IoContext {
222222
public:
223-
AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
224-
size_t id,
225-
folly::EventBase* evb,
226-
size_t capacity,
227-
bool useIoUring,
228-
std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec);
223+
AsyncIoContext(
224+
std::unique_ptr<folly::AsyncBase>&& asyncBase,
225+
size_t id,
226+
folly::EventBase* evb,
227+
size_t capacity,
228+
bool useIoUring,
229+
const std::unordered_map<int, std::shared_ptr<FdpNvme>>& fdpNvmeDevs);
229230

230231
~AsyncIoContext() override = default;
231232

@@ -279,18 +280,16 @@ class AsyncIoContext : public IoContext {
279280
size_t numSubmitted_ = 0;
280281
size_t numCompleted_ = 0;
281282

282-
// Device info vector for FDP support
283-
const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
284-
// As of now, only one FDP enabled Device is supported
285-
static constexpr uint16_t kDefaultFdpIdx = 0u;
283+
// Map of file descriptors to FdpNvme device objects
284+
const std::unordered_map<int, std::shared_ptr<FdpNvme>>& fdpNvmeDevs_;
286285
};
287286

288287
// An FileDevice manages direct I/O to either a single or multiple (RAID0)
289288
// block device(s) or regular file(s).
290289
class FileDevice : public Device {
291290
public:
292291
FileDevice(std::vector<folly::File>&& fvec,
293-
std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
292+
std::unordered_map<int, std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
294293
uint64_t size,
295294
uint32_t blockSize,
296295
uint32_t stripeSize,
@@ -317,8 +316,8 @@ class FileDevice : public Device {
317316
// File vector for devices or regular files
318317
const std::vector<folly::File> fvec_{};
319318

320-
// Device info vector for FDP support
321-
const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
319+
// Map of file descriptors to FdpNvme device objects
320+
const std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs_;
322321

323322
// RAID stripe size when multiple devices are used
324323
const uint32_t stripeSize_;
@@ -750,20 +749,21 @@ bool SyncIoContext::submitIo(IOOp& op) {
750749
/*
751750
* AsyncIoContext
752751
*/
753-
AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
754-
size_t id,
755-
folly::EventBase* evb,
756-
size_t capacity,
757-
bool useIoUring,
758-
std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec)
752+
AsyncIoContext::AsyncIoContext(
753+
std::unique_ptr<folly::AsyncBase>&& asyncBase,
754+
size_t id,
755+
folly::EventBase* evb,
756+
size_t capacity,
757+
bool useIoUring,
758+
const std::unordered_map<int, std::shared_ptr<FdpNvme>>& fdpNvmeDevs)
759759
: asyncBase_(std::move(asyncBase)),
760760
id_(id),
761761
qDepth_(capacity),
762762
useIoUring_(useIoUring),
763-
fdpNvmeVec_(fdpNvmeVec) {
763+
fdpNvmeDevs_(fdpNvmeDevs) {
764764
#ifdef CACHELIB_IOURING_DISABLE
765765
// io_uring is not available on the system
766-
XDCHECK(!useIoUring_ && !(fdpNvmeVec_.size() > 0));
766+
XDCHECK(!useIoUring_ && !(fdpNvmeDevs_.size() > 0));
767767
useIoUring_ = false;
768768
#endif
769769
if (evb) {
@@ -781,7 +781,7 @@ AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
781781
"[{}] Created new async io context with qdepth {}{} io_engine {} {}",
782782
getName(), qDepth_, qDepth_ == 1 ? " (sync wait)" : "",
783783
useIoUring_ ? "io_uring" : "libaio",
784-
(fdpNvmeVec_.size() > 0) ? "FDP enabled" : "");
784+
(fdpNvmeDevs_.size() > 0) ? "FDP enabled" : "");
785785
}
786786

787787
void AsyncIoContext::pollCompletion() {
@@ -820,7 +820,7 @@ void AsyncIoContext::handleCompletion(
820820
}
821821

822822
auto len = aop->result();
823-
if (fdpNvmeVec_.size() > 0) {
823+
if (fdpNvmeDevs_.size() > 0) {
824824
// 0 means success here, so get the completed size from iop
825825
len = !len ? iop->size_ : 0;
826826
}
@@ -869,7 +869,7 @@ bool AsyncIoContext::submitIo(IOOp& op) {
869869
}
870870

871871
std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepAsyncIo(IOOp& op) {
872-
if (fdpNvmeVec_.size() > 0) {
872+
if (fdpNvmeDevs_.size() > 0) {
873873
return prepNvmeIo(op);
874874
}
875875

@@ -905,10 +905,10 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
905905
iouringCmdOp->initBase();
906906
struct io_uring_sqe& sqe = iouringCmdOp->getSqe();
907907
if (req.opType_ == OpType::READ) {
908-
fdpNvmeVec_[kDefaultFdpIdx]->prepReadUringCmdSqe(sqe, op.data_, op.size_,
909-
op.offset_);
908+
fdpNvmeDevs_.at(op.fd_)->prepReadUringCmdSqe(sqe, op.data_, op.size_,
909+
op.offset_);
910910
} else {
911-
fdpNvmeVec_[kDefaultFdpIdx]->prepWriteUringCmdSqe(
911+
fdpNvmeDevs_.at(op.fd_)->prepWriteUringCmdSqe(
912912
sqe, op.data_, op.size_, op.offset_, op.placeHandle_.value_or(-1));
913913
}
914914
io_uring_sqe_set_data(&sqe, iouringCmdOp.get());
@@ -921,23 +921,24 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
921921
/*
922922
* FileDevice
923923
*/
924-
FileDevice::FileDevice(std::vector<folly::File>&& fvec,
925-
std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
926-
uint64_t fileSize,
927-
uint32_t blockSize,
928-
uint32_t stripeSize,
929-
uint32_t maxIOSize,
930-
uint32_t maxDeviceWriteSize,
931-
IoEngine ioEngine,
932-
uint32_t qDepthPerContext,
933-
std::shared_ptr<DeviceEncryptor> encryptor)
924+
FileDevice::FileDevice(
925+
std::vector<folly::File>&& fvec,
926+
std::unordered_map<int, std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
927+
uint64_t fileSize,
928+
uint32_t blockSize,
929+
uint32_t stripeSize,
930+
uint32_t maxIOSize,
931+
uint32_t maxDeviceWriteSize,
932+
IoEngine ioEngine,
933+
uint32_t qDepthPerContext,
934+
std::shared_ptr<DeviceEncryptor> encryptor)
934935
: Device(fileSize * fvec.size(),
935936
std::move(encryptor),
936937
blockSize,
937938
maxIOSize,
938939
maxDeviceWriteSize),
939940
fvec_(std::move(fvec)),
940-
fdpNvmeVec_(std::move(fdpNvmeVec)),
941+
fdpNvmeDevs_(std::move(fdpNvmeDevs)),
941942
stripeSize_(stripeSize),
942943
ioEngine_(ioEngine),
943944
qDepthPerContext_(qDepthPerContext) {
@@ -974,7 +975,7 @@ FileDevice::FileDevice(std::vector<folly::File>&& fvec,
974975
"num_fdp_devices {}",
975976
fvec_.size(), getSize(), blockSize, stripeSize, maxDeviceWriteSize,
976977
maxIOSize, getIoEngineName(ioEngine_), qDepthPerContext_,
977-
fdpNvmeVec_.size());
978+
fdpNvmeDevs_.size());
978979
}
979980

980981
bool FileDevice::readImpl(uint64_t offset, uint32_t size, void* value) {
@@ -1030,7 +1031,7 @@ IoContext* FileDevice::getIoContext() {
10301031
std::unique_ptr<folly::AsyncBase> asyncBase;
10311032
if (useIoUring) {
10321033
#ifndef CACHELIB_IOURING_DISABLE
1033-
if (fdpNvmeVec_.size() > 0) {
1034+
if (fdpNvmeDevs_.size() > 0) {
10341035
// Big sqe/cqe is mandatory for NVMe passthrough
10351036
// https://elixir.bootlin.com/linux/v6.7/source/drivers/nvme/host/ioctl.c#L742
10361037
folly::IoUringOp::Options options;
@@ -1051,7 +1052,7 @@ IoContext* FileDevice::getIoContext() {
10511052
auto idx = incrementalIdx_++;
10521053
tlContext_.reset(new AsyncIoContext(std::move(asyncBase), idx, evb,
10531054
qDepthPerContext_, useIoUring,
1054-
fdpNvmeVec_));
1055+
fdpNvmeDevs_));
10551056

10561057
{
10571058
// Keep pointers in a vector to ease the gdb debugging
@@ -1067,10 +1068,20 @@ IoContext* FileDevice::getIoContext() {
10671068
}
10681069

10691070
int FileDevice::allocatePlacementHandle() {
1070-
static constexpr uint16_t kDefaultFdpIdx = 0u;
10711071
#ifndef CACHELIB_IOURING_DISABLE
1072-
if (fdpNvmeVec_.size() > 0) {
1073-
return fdpNvmeVec_[kDefaultFdpIdx]->allocateFdpHandle();
1072+
if (fdpNvmeDevs_.size() > 0) {
1073+
auto fdpHandle = -1;
1074+
// Ensuring that same FDP placement handle is allocated for all FdpNvme
1075+
// devices for RAID, and returns the allocated handle if successful,
1076+
// or -1 if there is a conflict
1077+
for (auto& nvmeFdp : fdpNvmeDevs_) {
1078+
auto tempHandle = nvmeFdp.second->allocateFdpHandle();
1079+
if (fdpHandle != -1 && (tempHandle != fdpHandle)) {
1080+
return -1;
1081+
}
1082+
fdpHandle = tempHandle;
1083+
}
1084+
return fdpHandle;
10741085
}
10751086
#endif
10761087
return -1;
@@ -1186,31 +1197,25 @@ std::unique_ptr<Device> createDirectIoFileDevice(
11861197
XDCHECK(folly::isPowTwo(blockSize));
11871198

11881199
uint32_t maxIOSize = maxDeviceWriteSize;
1189-
std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec{};
1200+
std::unordered_map<int, std::shared_ptr<FdpNvme>> fdpNvmeDevs;
11901201
#ifndef CACHELIB_IOURING_DISABLE
11911202
if (isFDPEnabled) {
11921203
try {
1193-
if (filePaths.size() > 1) {
1194-
throw std::invalid_argument(folly::sformat(
1195-
"{} input files; but FDP mode does not support RAID files yet",
1196-
filePaths.size()));
1197-
}
1198-
1199-
for (const auto& path : filePaths) {
1200-
auto fdpNvme = std::make_shared<FdpNvme>(path);
1204+
for (size_t i = 0; i < filePaths.size(); i++) {
1205+
auto fdpNvme = std::make_shared<FdpNvme>(filePaths[i]);
12011206

12021207
auto maxDevIOSize = fdpNvme->getMaxIOSize();
12031208
if (maxDevIOSize != 0u &&
12041209
(maxIOSize == 0u || maxDevIOSize < maxIOSize)) {
12051210
maxIOSize = maxDevIOSize;
12061211
}
12071212

1208-
fdpNvmeVec.push_back(std::move(fdpNvme));
1213+
fdpNvmeDevs.insert({fVec[i].fd(), std::move(fdpNvme)});
12091214
}
12101215
} catch (const std::exception& e) {
12111216
XLOGF(ERR, "NVMe FDP mode could not be enabled {}, Errno: {}", e.what(),
12121217
errno);
1213-
fdpNvmeVec.clear();
1218+
fdpNvmeDevs.clear();
12141219
maxIOSize = 0u;
12151220
}
12161221
}
@@ -1221,7 +1226,7 @@ std::unique_ptr<Device> createDirectIoFileDevice(
12211226
}
12221227

12231228
return std::make_unique<FileDevice>(std::move(fVec),
1224-
std::move(fdpNvmeVec),
1229+
std::move(fdpNvmeDevs),
12251230
fileSize,
12261231
blockSize,
12271232
stripeSize,

0 commit comments

Comments
 (0)