Skip to content

[NFC][SYCL] Accept queue_impl by raw ptr/ref in MemoryManager #18712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) {
// Get or allocate the USM memory associated with the device global.
DeviceGlobalUSMMem &DeviceGlobalUSM =
DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(QueueImpl);
DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(*QueueImpl);

// If the device global still has a initialization event it should be
// added to the initialization events list. Since initialization events
Expand Down
8 changes: 4 additions & 4 deletions sycl/source/detail/device_global_map_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(const AdapterPtr &Adapter) {
return OwnedUrEvent(Adapter);
}

DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(
const std::shared_ptr<queue_impl> &QueueImpl) {
DeviceGlobalUSMMem &
DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
assert(!MIsDeviceImageScopeDecorated &&
"USM allocations should not be acquired for device_global with "
"device_image_scope property.");
const std::shared_ptr<context_impl> &CtxImpl = QueueImpl->getContextImplPtr();
const device_impl &DevImpl = QueueImpl->getDeviceImpl();
const std::shared_ptr<context_impl> &CtxImpl = QueueImpl.getContextImplPtr();
const device_impl &DevImpl = QueueImpl.getDeviceImpl();
std::lock_guard<std::mutex> Lock(MDeviceToUSMPtrMapMutex);

auto DGUSMPtr = MDeviceToUSMPtrMap.find({&DevImpl, CtxImpl.get()});
Expand Down
3 changes: 1 addition & 2 deletions sycl/source/detail/device_global_map_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ struct DeviceGlobalMapEntry {
}

// Gets or allocates USM memory for a device_global.
DeviceGlobalUSMMem &
getOrAllocateDeviceGlobalUSM(const std::shared_ptr<queue_impl> &QueueImpl);
DeviceGlobalUSMMem &getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl);

// Removes resources for device_globals associated with the context.
void removeAssociatedResources(const context_impl *CtxImpl);
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ class kernel_bundle_impl {
// of using a throw-away queue.
queue InitQueue{MContext, Dev};
auto &USMMem =
Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue));
Entry->getOrAllocateDeviceGlobalUSM(*getSyclObjImpl(InitQueue));
InitQueue.wait_and_throw();
return USMMem.getPtr();
}
Expand Down
263 changes: 112 additions & 151 deletions sycl/source/detail/memory_manager.cpp

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions sycl/source/detail/memory_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,80 +107,80 @@ class MemoryManager {

// Copies memory between: host and device, host and host,
// device and device if memory objects bound to the one context.
static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, QueueImplPtr SrcQueue,
static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, queue_impl *SrcQueue,
unsigned int DimSrc, sycl::range<3> SrcSize,
sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset,
unsigned int SrcElemSize, void *DstMem,
QueueImplPtr TgtQueue, unsigned int DimDst,
sycl::range<3> DstSize, sycl::range<3> DstAccessRange,
sycl::id<3> DstOffset, unsigned int DstElemSize,
unsigned int SrcElemSize, void *DstMem, queue_impl *TgtQueue,
unsigned int DimDst, sycl::range<3> DstSize,
sycl::range<3> DstAccessRange, sycl::id<3> DstOffset,
unsigned int DstElemSize,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t &OutEvent);

static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue,
size_t PatternSize, const unsigned char *Pattern,
unsigned int Dim, sycl::range<3> Size,
sycl::range<3> AccessRange, sycl::id<3> AccessOffset,
unsigned int ElementSize,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t &OutEvent);

static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue,
access::mode AccessMode, unsigned int Dim,
sycl::range<3> Size, sycl::range<3> AccessRange,
sycl::id<3> AccessOffset, unsigned int ElementSize,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t &OutEvent);

static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue,
static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue,
void *MappedPtr, std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t &OutEvent);

static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len,
static void copy_usm(const void *SrcMem, queue_impl &Queue, size_t Len,
void *DstMem, std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len,
static void fill_usm(void *DstMem, queue_impl &Queue, size_t Len,
const std::vector<unsigned char> &Pattern,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len,
static void prefetch_usm(void *Ptr, queue_impl &Queue, size_t Len,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len,
static void advise_usm(const void *Ptr, queue_impl &Queue, size_t Len,
ur_usm_advice_flags_t Advice,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void copy_2d_usm(const void *SrcMem, size_t SrcPitch,
QueueImplPtr Queue, void *DstMem, size_t DstPitch,
queue_impl &Queue, void *DstMem, size_t DstPitch,
size_t Width, size_t Height,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch,
static void fill_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch,
size_t Width, size_t Height,
const std::vector<unsigned char> &Pattern,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch,
static void memset_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch,
size_t Width, size_t Height, char Value,
std::vector<ur_event_handle_t> DepEvents,
ur_event_handle_t *OutEvent);

static void
copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped,
QueueImplPtr Queue, size_t NumBytes, size_t Offset,
queue_impl &Queue, size_t NumBytes, size_t Offset,
const void *SrcMem,
const std::vector<ur_event_handle_t> &DepEvents,
ur_event_handle_t *OutEvent);

static void
copy_from_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped,
QueueImplPtr Queue, size_t NumBytes, size_t Offset,
queue_impl &Queue, size_t NumBytes, size_t Offset,
void *DstMem,
const std::vector<ur_event_handle_t> &DepEvents,
ur_event_handle_t *OutEvent);
Expand Down Expand Up @@ -254,7 +254,7 @@ class MemoryManager {
ur_exp_command_buffer_sync_point_t *OutSyncPoint);

static void copy_image_bindless(
QueueImplPtr Queue, const void *Src, void *Dst,
queue_impl &Queue, const void *Src, void *Dst,
const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc,
const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat,
const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset,
Expand Down
18 changes: 10 additions & 8 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
return submitMemOpHelper(
Self, DepEvents, CallerNeedsEvent,
[&](handler &CGH) { CGH.memset(Ptr, Value, Count); },
MemoryManager::fill_usm, Ptr, Self, Count, Pattern);
MemoryManager::fill_usm, Ptr, *this, Count, Pattern);
}

void report(const code_location &CodeLoc) {
Expand Down Expand Up @@ -233,7 +233,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
return submitMemOpHelper(
Self, DepEvents, CallerNeedsEvent,
[&](handler &CGH) { CGH.memcpy(Dest, Src, Count); },
MemoryManager::copy_usm, Src, Self, Count, Dest);
MemoryManager::copy_usm, Src, *this, Count, Dest);
}

event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
Expand All @@ -244,7 +244,7 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
return submitMemOpHelper(
Self, DepEvents, CallerNeedsEvent,
[&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); },
MemoryManager::advise_usm, Ptr, Self, Length, Advice);
MemoryManager::advise_usm, Ptr, *this, Length, Advice);
}

event queue_impl::memcpyToDeviceGlobal(
Expand All @@ -258,7 +258,7 @@ event queue_impl::memcpyToDeviceGlobal(
NumBytes, Offset);
},
MemoryManager::copy_to_device_global, DeviceGlobalPtr, IsDeviceImageScope,
Self, NumBytes, Offset, Src);
*this, NumBytes, Offset, Src);
}

event queue_impl::memcpyFromDeviceGlobal(
Expand All @@ -272,7 +272,7 @@ event queue_impl::memcpyFromDeviceGlobal(
NumBytes, Offset);
},
MemoryManager::copy_from_device_global, DeviceGlobalPtr,
IsDeviceImageScope, Self, NumBytes, Offset, Dest);
IsDeviceImageScope, *this, NumBytes, Offset, Dest);
}

sycl::detail::optional<event>
Expand Down Expand Up @@ -449,7 +449,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
bool CallerNeedsEvent,
HandlerFuncT HandlerFunc,
MemOpFuncT MemOpFunc,
MemOpArgTs... MemOpArgs) {
MemOpArgTs &&...MemOpArgs) {
// We need to submit command and update the last event under same lock if we
// have in-order queue.
{
Expand All @@ -468,7 +468,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
auto isNoEventsMode = trySwitchingToNoEventsMode();
if (!CallerNeedsEvent && isNoEventsMode) {
NestedCallsTracker tracker;
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
MemOpFunc(std::forward<MemOpArgTs>(MemOpArgs)...,
getUrEvents(ExpandedDepEvents),
/*PiEvent*/ nullptr);

return createDiscardedEvent();
Expand All @@ -480,7 +481,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
NestedCallsTracker tracker;
ur_event_handle_t UREvent = nullptr;
EventImpl->setSubmissionTime();
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &UREvent);
MemOpFunc(std::forward<MemOpArgTs>(MemOpArgs)...,
getUrEvents(ExpandedDepEvents), &UREvent);
EventImpl->setHandle(UREvent);
EventImpl->setEnqueued();
// connect returned event with dependent events
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,8 @@ class queue_impl {
event submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
const std::vector<event> &DepEvents,
bool CallerNeedsEvent, HandlerFuncT HandlerFunc,
MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs);
MemMngrFuncT MemMngrFunc,
MemMngrArgTs &&...MemOpArgs);

// When instrumentation is enabled emits trace event for wait begin and
// returns the telemetry event generated for the wait
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ addCounterInit(handler &CGH, std::shared_ptr<sycl::detail::queue_impl> &Queue,
EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context()));
EventImpl->setStateIncomplete();
ur_event_handle_t UREvent = nullptr;
MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, &UREvent);
MemoryManager::fill_usm(Counter.get(), *Queue, sizeof(int), {0}, {},
&UREvent);
EventImpl->setHandle(UREvent);
CGH.depends_on(createSyclObjFromImpl<event>(EventImpl));
}
Expand Down
Loading