@@ -220,12 +220,13 @@ class CompletionHandler : public folly::EventHandler {
220
220
// Per-thread context for AsyncIO like libaio or io_uring
221
221
class AsyncIoContext : public IoContext {
222
222
public:
223
- AsyncIoContext (std::unique_ptr<folly::AsyncBase>&& asyncBase,
224
- size_t id,
225
- folly::EventBase* evb,
226
- size_t capacity,
227
- bool useIoUring,
228
- std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec);
223
+ AsyncIoContext (
224
+ std::unique_ptr<folly::AsyncBase>&& asyncBase,
225
+ size_t id,
226
+ folly::EventBase* evb,
227
+ size_t capacity,
228
+ bool useIoUring,
229
+ const std::unordered_map<int , std::shared_ptr<FdpNvme>>& fdpNvmeDevs);
229
230
230
231
~AsyncIoContext () override = default ;
231
232
@@ -279,18 +280,16 @@ class AsyncIoContext : public IoContext {
279
280
size_t numSubmitted_ = 0 ;
280
281
size_t numCompleted_ = 0 ;
281
282
282
- // Device info vector for FDP support
283
- const std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec_{};
284
- // As of now, only one FDP enabled Device is supported
285
- static constexpr uint16_t kDefaultFdpIdx = 0u ;
283
+ // Map of file descriptors to FdpNvme device objects
284
+ const std::unordered_map<int , std::shared_ptr<FdpNvme>>& fdpNvmeDevs_;
286
285
};
287
286
288
287
// An FileDevice manages direct I/O to either a single or multiple (RAID0)
289
288
// block device(s) or regular file(s).
290
289
class FileDevice : public Device {
291
290
public:
292
291
FileDevice (std::vector<folly::File>&& fvec,
293
- std::vector< std::shared_ptr<FdpNvme>>&& fdpNvmeVec ,
292
+ std::unordered_map< int , std::shared_ptr<FdpNvme>>&& fdpNvmeDevs ,
294
293
uint64_t size,
295
294
uint32_t blockSize,
296
295
uint32_t stripeSize,
@@ -317,8 +316,8 @@ class FileDevice : public Device {
317
316
// File vector for devices or regular files
318
317
const std::vector<folly::File> fvec_{};
319
318
320
- // Device info vector for FDP support
321
- const std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec_{} ;
319
+ // Map of file descriptors to FdpNvme device objects
320
+ const std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs_ ;
322
321
323
322
// RAID stripe size when multiple devices are used
324
323
const uint32_t stripeSize_;
@@ -750,20 +749,21 @@ bool SyncIoContext::submitIo(IOOp& op) {
750
749
/*
751
750
* AsyncIoContext
752
751
*/
753
- AsyncIoContext::AsyncIoContext (std::unique_ptr<folly::AsyncBase>&& asyncBase,
754
- size_t id,
755
- folly::EventBase* evb,
756
- size_t capacity,
757
- bool useIoUring,
758
- std::vector<std::shared_ptr<FdpNvme>> fdpNvmeVec)
752
+ AsyncIoContext::AsyncIoContext (
753
+ std::unique_ptr<folly::AsyncBase>&& asyncBase,
754
+ size_t id,
755
+ folly::EventBase* evb,
756
+ size_t capacity,
757
+ bool useIoUring,
758
+ const std::unordered_map<int , std::shared_ptr<FdpNvme>>& fdpNvmeDevs)
759
759
: asyncBase_(std::move(asyncBase)),
760
760
id_(id),
761
761
qDepth_(capacity),
762
762
useIoUring_(useIoUring),
763
- fdpNvmeVec_(fdpNvmeVec ) {
763
+ fdpNvmeDevs_(fdpNvmeDevs ) {
764
764
#ifdef CACHELIB_IOURING_DISABLE
765
765
// io_uring is not available on the system
766
- XDCHECK (!useIoUring_ && !(fdpNvmeVec_ .size () > 0 ));
766
+ XDCHECK (!useIoUring_ && !(fdpNvmeDevs_ .size () > 0 ));
767
767
useIoUring_ = false ;
768
768
#endif
769
769
if (evb) {
@@ -781,7 +781,7 @@ AsyncIoContext::AsyncIoContext(std::unique_ptr<folly::AsyncBase>&& asyncBase,
781
781
" [{}] Created new async io context with qdepth {}{} io_engine {} {}" ,
782
782
getName (), qDepth_, qDepth_ == 1 ? " (sync wait)" : " " ,
783
783
useIoUring_ ? " io_uring" : " libaio" ,
784
- (fdpNvmeVec_ .size () > 0 ) ? " FDP enabled" : " " );
784
+ (fdpNvmeDevs_ .size () > 0 ) ? " FDP enabled" : " " );
785
785
}
786
786
787
787
void AsyncIoContext::pollCompletion () {
@@ -820,7 +820,7 @@ void AsyncIoContext::handleCompletion(
820
820
}
821
821
822
822
auto len = aop->result ();
823
- if (fdpNvmeVec_ .size () > 0 ) {
823
+ if (fdpNvmeDevs_ .size () > 0 ) {
824
824
// 0 means success here, so get the completed size from iop
825
825
len = !len ? iop->size_ : 0 ;
826
826
}
@@ -869,7 +869,7 @@ bool AsyncIoContext::submitIo(IOOp& op) {
869
869
}
870
870
871
871
std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepAsyncIo (IOOp& op) {
872
- if (fdpNvmeVec_ .size () > 0 ) {
872
+ if (fdpNvmeDevs_ .size () > 0 ) {
873
873
return prepNvmeIo (op);
874
874
}
875
875
@@ -905,10 +905,10 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
905
905
iouringCmdOp->initBase ();
906
906
struct io_uring_sqe & sqe = iouringCmdOp->getSqe ();
907
907
if (req.opType_ == OpType::READ) {
908
- fdpNvmeVec_[ kDefaultFdpIdx ] ->prepReadUringCmdSqe (sqe, op.data_ , op.size_ ,
909
- op.offset_ );
908
+ fdpNvmeDevs_. at (op. fd_ ) ->prepReadUringCmdSqe (sqe, op.data_ , op.size_ ,
909
+ op.offset_ );
910
910
} else {
911
- fdpNvmeVec_[ kDefaultFdpIdx ] ->prepWriteUringCmdSqe (
911
+ fdpNvmeDevs_. at (op. fd_ ) ->prepWriteUringCmdSqe (
912
912
sqe, op.data_ , op.size_ , op.offset_ , op.placeHandle_ .value_or (-1 ));
913
913
}
914
914
io_uring_sqe_set_data (&sqe, iouringCmdOp.get ());
@@ -921,23 +921,24 @@ std::unique_ptr<folly::AsyncBaseOp> AsyncIoContext::prepNvmeIo(IOOp& op) {
921
921
/*
922
922
* FileDevice
923
923
*/
924
- FileDevice::FileDevice (std::vector<folly::File>&& fvec,
925
- std::vector<std::shared_ptr<FdpNvme>>&& fdpNvmeVec,
926
- uint64_t fileSize,
927
- uint32_t blockSize,
928
- uint32_t stripeSize,
929
- uint32_t maxIOSize,
930
- uint32_t maxDeviceWriteSize,
931
- IoEngine ioEngine,
932
- uint32_t qDepthPerContext,
933
- std::shared_ptr<DeviceEncryptor> encryptor)
924
+ FileDevice::FileDevice (
925
+ std::vector<folly::File>&& fvec,
926
+ std::unordered_map<int , std::shared_ptr<FdpNvme>>&& fdpNvmeDevs,
927
+ uint64_t fileSize,
928
+ uint32_t blockSize,
929
+ uint32_t stripeSize,
930
+ uint32_t maxIOSize,
931
+ uint32_t maxDeviceWriteSize,
932
+ IoEngine ioEngine,
933
+ uint32_t qDepthPerContext,
934
+ std::shared_ptr<DeviceEncryptor> encryptor)
934
935
: Device(fileSize * fvec.size(),
935
936
std::move(encryptor),
936
937
blockSize,
937
938
maxIOSize,
938
939
maxDeviceWriteSize),
939
940
fvec_(std::move(fvec)),
940
- fdpNvmeVec_ (std::move(fdpNvmeVec )),
941
+ fdpNvmeDevs_ (std::move(fdpNvmeDevs )),
941
942
stripeSize_(stripeSize),
942
943
ioEngine_(ioEngine),
943
944
qDepthPerContext_(qDepthPerContext) {
@@ -974,7 +975,7 @@ FileDevice::FileDevice(std::vector<folly::File>&& fvec,
974
975
" num_fdp_devices {}" ,
975
976
fvec_.size (), getSize (), blockSize, stripeSize, maxDeviceWriteSize,
976
977
maxIOSize, getIoEngineName (ioEngine_), qDepthPerContext_,
977
- fdpNvmeVec_ .size ());
978
+ fdpNvmeDevs_ .size ());
978
979
}
979
980
980
981
bool FileDevice::readImpl (uint64_t offset, uint32_t size, void * value) {
@@ -1030,7 +1031,7 @@ IoContext* FileDevice::getIoContext() {
1030
1031
std::unique_ptr<folly::AsyncBase> asyncBase;
1031
1032
if (useIoUring) {
1032
1033
#ifndef CACHELIB_IOURING_DISABLE
1033
- if (fdpNvmeVec_ .size () > 0 ) {
1034
+ if (fdpNvmeDevs_ .size () > 0 ) {
1034
1035
// Big sqe/cqe is mandatory for NVMe passthrough
1035
1036
// https://elixir.bootlin.com/linux/v6.7/source/drivers/nvme/host/ioctl.c#L742
1036
1037
folly::IoUringOp::Options options;
@@ -1051,7 +1052,7 @@ IoContext* FileDevice::getIoContext() {
1051
1052
auto idx = incrementalIdx_++;
1052
1053
tlContext_.reset (new AsyncIoContext (std::move (asyncBase), idx, evb,
1053
1054
qDepthPerContext_, useIoUring,
1054
- fdpNvmeVec_ ));
1055
+ fdpNvmeDevs_ ));
1055
1056
1056
1057
{
1057
1058
// Keep pointers in a vector to ease the gdb debugging
@@ -1067,10 +1068,20 @@ IoContext* FileDevice::getIoContext() {
1067
1068
}
1068
1069
1069
1070
int FileDevice::allocatePlacementHandle () {
1070
- static constexpr uint16_t kDefaultFdpIdx = 0u ;
1071
1071
#ifndef CACHELIB_IOURING_DISABLE
1072
- if (fdpNvmeVec_.size () > 0 ) {
1073
- return fdpNvmeVec_[kDefaultFdpIdx ]->allocateFdpHandle ();
1072
+ if (fdpNvmeDevs_.size () > 0 ) {
1073
+ auto fdpHandle = -1 ;
1074
+ // Ensuring that same FDP placement handle is allocated for all FdpNvme
1075
+ // devices for RAID, and returns the allocated handle if successful,
1076
+ // or -1 if there is a conflict
1077
+ for (auto & nvmeFdp : fdpNvmeDevs_) {
1078
+ auto tempHandle = nvmeFdp.second ->allocateFdpHandle ();
1079
+ if (fdpHandle != -1 && (tempHandle != fdpHandle)) {
1080
+ return -1 ;
1081
+ }
1082
+ fdpHandle = tempHandle;
1083
+ }
1084
+ return fdpHandle;
1074
1085
}
1075
1086
#endif
1076
1087
return -1 ;
@@ -1186,31 +1197,25 @@ std::unique_ptr<Device> createDirectIoFileDevice(
1186
1197
XDCHECK (folly::isPowTwo (blockSize));
1187
1198
1188
1199
uint32_t maxIOSize = maxDeviceWriteSize;
1189
- std::vector< std::shared_ptr<FdpNvme>> fdpNvmeVec{} ;
1200
+ std::unordered_map< int , std::shared_ptr<FdpNvme>> fdpNvmeDevs ;
1190
1201
#ifndef CACHELIB_IOURING_DISABLE
1191
1202
if (isFDPEnabled) {
1192
1203
try {
1193
- if (filePaths.size () > 1 ) {
1194
- throw std::invalid_argument (folly::sformat (
1195
- " {} input files; but FDP mode does not support RAID files yet" ,
1196
- filePaths.size ()));
1197
- }
1198
-
1199
- for (const auto & path : filePaths) {
1200
- auto fdpNvme = std::make_shared<FdpNvme>(path);
1204
+ for (size_t i = 0 ; i < filePaths.size (); i++) {
1205
+ auto fdpNvme = std::make_shared<FdpNvme>(filePaths[i]);
1201
1206
1202
1207
auto maxDevIOSize = fdpNvme->getMaxIOSize ();
1203
1208
if (maxDevIOSize != 0u &&
1204
1209
(maxIOSize == 0u || maxDevIOSize < maxIOSize)) {
1205
1210
maxIOSize = maxDevIOSize;
1206
1211
}
1207
1212
1208
- fdpNvmeVec. push_back ( std::move (fdpNvme));
1213
+ fdpNvmeDevs. insert ({ fVec [i]. fd (), std::move (fdpNvme)} );
1209
1214
}
1210
1215
} catch (const std::exception & e) {
1211
1216
XLOGF (ERR, " NVMe FDP mode could not be enabled {}, Errno: {}" , e.what (),
1212
1217
errno);
1213
- fdpNvmeVec .clear ();
1218
+ fdpNvmeDevs .clear ();
1214
1219
maxIOSize = 0u ;
1215
1220
}
1216
1221
}
@@ -1221,7 +1226,7 @@ std::unique_ptr<Device> createDirectIoFileDevice(
1221
1226
}
1222
1227
1223
1228
return std::make_unique<FileDevice>(std::move (fVec ),
1224
- std::move (fdpNvmeVec ),
1229
+ std::move (fdpNvmeDevs ),
1225
1230
fileSize,
1226
1231
blockSize,
1227
1232
stripeSize,
0 commit comments