-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[Offload] Make olLaunchKernel test thread safe #149497
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8d9adc6
f3fa27b
d81df98
d757534
0d2371a
fbced13
cd7c283
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -206,7 +206,7 @@ Error initPlugins(OffloadContext &Context) { | |
} | ||
|
||
Error olInit_impl() { | ||
std::lock_guard<std::mutex> Lock{OffloadContextValMutex}; | ||
std::lock_guard<std::mutex> Lock(OffloadContextValMutex); | ||
|
||
if (isOffloadInitialized()) { | ||
OffloadContext::get().RefCount++; | ||
|
@@ -224,7 +224,7 @@ Error olInit_impl() { | |
} | ||
|
||
Error olShutDown_impl() { | ||
std::lock_guard<std::mutex> Lock{OffloadContextValMutex}; | ||
std::lock_guard<std::mutex> Lock(OffloadContextValMutex); | ||
|
||
if (--OffloadContext::get().RefCount != 0) | ||
return Error::success(); | ||
|
@@ -487,16 +487,12 @@ Error olSyncQueue_impl(ol_queue_handle_t Queue) { | |
// Host plugin doesn't have a queue set so it's not safe to call synchronize | ||
// on it, but we have nothing to synchronize in that situation anyway. | ||
if (Queue->AsyncInfo->Queue) { | ||
if (auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo)) | ||
// We don't need to release the queue and we would like the ability for | ||
// other offload threads to submit work concurrently, so pass "false" here. | ||
if (auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo, false)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please indicate with a comment what's the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code assumes other threads will not release the queue from that async info, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, although as far as I know, liboffload doesn't do that, and that feels reasonable as a thing to mark as undefined. |
||
return Err; | ||
} | ||
|
||
// Recreate the stream resource so the queue can be reused | ||
// TODO: Would be easier for the synchronization to (optionally) not release | ||
// it to begin with. | ||
if (auto Res = Queue->Device->Device->initAsyncInfo(&Queue->AsyncInfo)) | ||
return Res; | ||
|
||
return Error::success(); | ||
} | ||
|
||
|
@@ -727,7 +723,7 @@ Error olGetSymbol_impl(ol_program_handle_t Program, const char *Name, | |
ol_symbol_kind_t Kind, ol_symbol_handle_t *Symbol) { | ||
auto &Device = Program->Image->getDevice(); | ||
|
||
std::lock_guard<std::mutex> Lock{Program->SymbolListMutex}; | ||
std::lock_guard<std::mutex> Lock(Program->SymbolListMutex); | ||
|
||
switch (Kind) { | ||
case OL_SYMBOL_KIND_KERNEL: { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,7 @@ struct GenericPluginTy; | |
struct GenericKernelTy; | ||
struct GenericDeviceTy; | ||
struct RecordReplayTy; | ||
template <typename ResourceRef> class GenericDeviceResourceManagerTy; | ||
|
||
/// Class that wraps the __tgt_async_info to simply its usage. In case the | ||
/// object is constructed without a valid __tgt_async_info, the object will use | ||
|
@@ -93,6 +94,20 @@ struct AsyncInfoWrapperTy { | |
AsyncInfoPtr->Queue = Queue; | ||
} | ||
|
||
/// Get the queue, using the provided resource manager to initialise it if it | ||
/// doesn't exist. | ||
template <typename Ty, typename RMTy> | ||
Expected<Ty> | ||
getOrInitQueue(GenericDeviceResourceManagerTy<RMTy> &ResourceManager) { | ||
std::lock_guard<std::mutex> Lock(AsyncInfoPtr->Mutex); | ||
if (!AsyncInfoPtr->Queue) { | ||
if (auto Err = ResourceManager.getResource( | ||
*reinterpret_cast<Ty *>(&AsyncInfoPtr->Queue))) | ||
return Err; | ||
} | ||
return getQueueAs<Ty>(); | ||
} | ||
|
||
/// Synchronize with the __tgt_async_info's pending operations if it's the | ||
/// internal async info. The error associated to the asynchronous operations | ||
/// issued in this queue must be provided in \p Err. This function will update | ||
|
@@ -104,6 +119,7 @@ struct AsyncInfoWrapperTy { | |
/// Register \p Ptr as an associated allocation that is freed after | ||
/// finalization. | ||
void freeAllocationAfterSynchronization(void *Ptr) { | ||
std::lock_guard<std::mutex> AllocationGuard{AsyncInfoPtr->Mutex}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still some instances with the |
||
AsyncInfoPtr->AssociatedAllocations.push_back(Ptr); | ||
} | ||
|
||
|
@@ -793,9 +809,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { | |
Error setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image); | ||
|
||
/// Synchronize the current thread with the pending operations on the | ||
/// __tgt_async_info structure. | ||
Error synchronize(__tgt_async_info *AsyncInfo); | ||
virtual Error synchronizeImpl(__tgt_async_info &AsyncInfo) = 0; | ||
/// __tgt_async_info structure. If ReleaseQueue is false, then the | ||
// underlying queue will not be released. In this case, additional | ||
// work may be submitted to the queue whilst a synchronize is running. | ||
Error synchronize(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true); | ||
virtual Error synchronizeImpl(__tgt_async_info &AsyncInfo, | ||
bool ReleaseQueue) = 0; | ||
|
||
/// Invokes any global constructors on the device if present and is required | ||
/// by the target. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -522,16 +522,11 @@ struct CUDADeviceTy : public GenericDeviceTy { | |
|
||
/// Get the stream of the asynchronous info structure or get a new one. | ||
Error getStream(AsyncInfoWrapperTy &AsyncInfoWrapper, CUstream &Stream) { | ||
// Get the stream (if any) from the async info. | ||
Stream = AsyncInfoWrapper.getQueueAs<CUstream>(); | ||
if (!Stream) { | ||
// There was no stream; get an idle one. | ||
if (auto Err = CUDAStreamManager.getResource(Stream)) | ||
return Err; | ||
|
||
// Modify the async info's stream. | ||
AsyncInfoWrapper.setQueueAs<CUstream>(Stream); | ||
} | ||
auto WrapperStream = | ||
AsyncInfoWrapper.getOrInitQueue<CUstream>(CUDAStreamManager); | ||
if (!WrapperStream) | ||
return WrapperStream.takeError(); | ||
Stream = *WrapperStream; | ||
return Plugin::success(); | ||
} | ||
|
||
|
@@ -642,17 +637,20 @@ struct CUDADeviceTy : public GenericDeviceTy { | |
} | ||
|
||
/// Synchronize current thread with the pending operations on the async info. | ||
Error synchronizeImpl(__tgt_async_info &AsyncInfo) override { | ||
Error synchronizeImpl(__tgt_async_info &AsyncInfo, | ||
bool ReleaseQueue) override { | ||
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo.Queue); | ||
CUresult Res; | ||
Res = cuStreamSynchronize(Stream); | ||
|
||
// Once the stream is synchronized, return it to stream pool and reset | ||
// AsyncInfo. This is to make sure the synchronization only works for its | ||
// own tasks. | ||
AsyncInfo.Queue = nullptr; | ||
if (auto Err = CUDAStreamManager.returnResource(Stream)) | ||
return Err; | ||
// Once the stream is synchronized and we want to release the queue, return | ||
// it to stream pool and reset AsyncInfo. This is to make sure the | ||
// synchronization only works for its own tasks. | ||
if (ReleaseQueue) { | ||
AsyncInfo.Queue = nullptr; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When does the queue gets unset/released for liboffload queues? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When the device is de-inited, all streams in the stream manager are deinited and dropped. For liboffload specifically, since devices are not cleared, this happens during the final liboffload There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But shouldn't the queue be released when |
||
if (auto Err = CUDAStreamManager.returnResource(Stream)) | ||
return Err; | ||
} | ||
|
||
return Plugin::check(Res, "error in cuStreamSynchronize: %s"); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,6 +104,29 @@ TEST_P(olLaunchKernelFooTest, Success) { | |
ASSERT_SUCCESS(olMemFree(Mem)); | ||
} | ||
|
||
TEST_P(olLaunchKernelFooTest, SuccessThreaded) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd love to be able to add an |
||
threadify([&](size_t) { | ||
void *Mem; | ||
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, | ||
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem)); | ||
struct { | ||
void *Mem; | ||
} Args{Mem}; | ||
|
||
ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), | ||
&LaunchArgs, nullptr)); | ||
|
||
ASSERT_SUCCESS(olWaitQueue(Queue)); | ||
|
||
uint32_t *Data = (uint32_t *)Mem; | ||
for (uint32_t i = 0; i < 64; i++) { | ||
ASSERT_EQ(Data[i], i); | ||
} | ||
|
||
ASSERT_SUCCESS(olMemFree(Mem)); | ||
}); | ||
} | ||
|
||
TEST_P(olLaunchKernelNoArgsTest, Success) { | ||
ASSERT_SUCCESS( | ||
olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs)); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this actually ignored for libomptarget? I think you're unconditionally acquiring the mutex.