diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 5e8e98ca231bb..9fe103986f913 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -427,7 +427,7 @@ std::vector context_impl::initializeDeviceGlobals( for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) { // Get or allocate the USM memory associated with the device global. DeviceGlobalUSMMem &DeviceGlobalUSM = - DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(QueueImpl); + DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(*QueueImpl); // If the device global still has a initialization event it should be // added to the initialization events list. Since initialization events diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index a6101e918d677..f2bc2d5b998fd 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -41,13 +41,13 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(const AdapterPtr &Adapter) { return OwnedUrEvent(Adapter); } -DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( - const std::shared_ptr &QueueImpl) { +DeviceGlobalUSMMem & +DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { assert(!MIsDeviceImageScopeDecorated && "USM allocations should not be acquired for device_global with " "device_image_scope property."); - const std::shared_ptr &CtxImpl = QueueImpl->getContextImplPtr(); - const device_impl &DevImpl = QueueImpl->getDeviceImpl(); + const std::shared_ptr &CtxImpl = QueueImpl.getContextImplPtr(); + const device_impl &DevImpl = QueueImpl.getDeviceImpl(); std::lock_guard Lock(MDeviceToUSMPtrMapMutex); auto DGUSMPtr = MDeviceToUSMPtrMap.find({&DevImpl, CtxImpl.get()}); diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index fc4bae13161ed..10d122c967677 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -109,8 +109,7 @@ struct DeviceGlobalMapEntry { } // Gets or allocates USM memory for a device_global. - DeviceGlobalUSMMem & - getOrAllocateDeviceGlobalUSM(const std::shared_ptr &QueueImpl); + DeviceGlobalUSMMem &getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl); // Removes resources for device_globals associated with the context. void removeAssociatedResources(const context_impl *CtxImpl); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index eb0340fb938b7..5f54d75b860ad 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -669,7 +669,7 @@ class kernel_bundle_impl { // of using a throw-away queue. queue InitQueue{MContext, Dev}; auto &USMMem = - Entry->getOrAllocateDeviceGlobalUSM(getSyclObjImpl(InitQueue)); + Entry->getOrAllocateDeviceGlobalUSM(*getSyclObjImpl(InitQueue)); InitQueue.wait_and_throw(); return USMMem.getPtr(); } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index a650fb59fcc26..800da83f73760 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -494,20 +494,19 @@ void prepTermPositions(TermPositions &pos, int Dimensions, } } -void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, +void copyH2D(queue_impl &TgtQueue, SYCLMemObjI *SYCLMemObj, char *SrcMem, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, unsigned int SrcElemSize, ur_mem_handle_t DstMem, - QueueImplPtr TgtQueue, unsigned int DimDst, sycl::range<3> DstSize, + unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, std::vector DepEvents, ur_event_handle_t &OutEvent) { (void)SrcAccessRange; assert(SYCLMemObj && "The SYCLMemObj is nullptr"); - assert(TgtQueue && "Destination mem object queue must be not nullptr"); - const ur_queue_handle_t Queue = TgtQueue->getHandleRef(); - const AdapterPtr &Adapter = TgtQueue->getAdapter(); + const ur_queue_handle_t Queue = TgtQueue.getHandleRef(); + const AdapterPtr &Adapter = TgtQueue.getAdapter(); detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType(); TermPositions SrcPos, DstPos; @@ -564,20 +563,19 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, } } -void copyD2H(SYCLMemObjI *SYCLMemObj, ur_mem_handle_t SrcMem, - QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize, - sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, char *DstMem, QueueImplPtr, +void copyD2H(queue_impl &SrcQueue, SYCLMemObjI *SYCLMemObj, + ur_mem_handle_t SrcMem, unsigned int DimSrc, + sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, + sycl::id<3> SrcOffset, unsigned int SrcElemSize, char *DstMem, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, std::vector DepEvents, ur_event_handle_t &OutEvent) { (void)DstAccessRange; assert(SYCLMemObj && "The SYCLMemObj is nullptr"); - assert(SrcQueue && "Source mem object queue is expected to be not nullptr"); - const ur_queue_handle_t Queue = SrcQueue->getHandleRef(); - const AdapterPtr &Adapter = SrcQueue->getAdapter(); + const ur_queue_handle_t Queue = SrcQueue.getHandleRef(); + const AdapterPtr &Adapter = SrcQueue.getAdapter(); detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType(); TermPositions SrcPos, DstPos; @@ -639,20 +637,20 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, ur_mem_handle_t SrcMem, } } -void copyD2D(SYCLMemObjI *SYCLMemObj, ur_mem_handle_t SrcMem, - QueueImplPtr SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize, - sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, ur_mem_handle_t DstMem, QueueImplPtr, - unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3>, - sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector DepEvents, +// Only when memory objects are bound to the same context, so one queue_impl is +// all we need. +void copyD2D(queue_impl &SrcQueue, SYCLMemObjI *SYCLMemObj, + ur_mem_handle_t SrcMem, unsigned int DimSrc, + sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, + sycl::id<3> SrcOffset, unsigned int SrcElemSize, + ur_mem_handle_t DstMem, unsigned int DimDst, + sycl::range<3> DstSize, sycl::range<3>, sycl::id<3> DstOffset, + unsigned int DstElemSize, std::vector DepEvents, ur_event_handle_t &OutEvent) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); - assert(SrcQueue && "Source mem object and target mem object queues are " - "expected to be not nullptr"); - const ur_queue_handle_t Queue = SrcQueue->getHandleRef(); - const AdapterPtr &Adapter = SrcQueue->getAdapter(); + const ur_queue_handle_t Queue = SrcQueue.getHandleRef(); + const AdapterPtr &Adapter = SrcQueue.getAdapter(); detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType(); TermPositions SrcPos, DstPos; @@ -710,11 +708,10 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, ur_mem_handle_t SrcMem, } } -static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, - unsigned int DimSrc, sycl::range<3> SrcSize, - sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, char *DstMem, QueueImplPtr, - unsigned int DimDst, sycl::range<3> DstSize, +static void copyH2H(SYCLMemObjI *, char *SrcMem, unsigned int DimSrc, + sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, + sycl::id<3> SrcOffset, unsigned int SrcElemSize, + char *DstMem, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, std::vector, ur_event_handle_t &) { @@ -739,10 +736,10 @@ static void copyH2H(SYCLMemObjI *, char *SrcMem, QueueImplPtr, // Copies memory between: host and device, host and host, // device and device if memory objects bound to the one context. void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, - QueueImplPtr SrcQueue, unsigned int DimSrc, + queue_impl *SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, unsigned int SrcElemSize, - void *DstMem, QueueImplPtr TgtQueue, + void *DstMem, queue_impl *TgtQueue, unsigned int DimDst, sycl::range<3> DstSize, sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, unsigned int DstElemSize, @@ -751,33 +748,32 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, if (!SrcQueue) { if (!TgtQueue) - copyH2H(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, - SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, nullptr, - DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, - std::move(DepEvents), OutEvent); + copyH2H(SYCLMemObj, (char *)SrcMem, DimSrc, SrcSize, SrcAccessRange, + SrcOffset, SrcElemSize, (char *)DstMem, DimDst, DstSize, + DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), + OutEvent); else - copyH2D(SYCLMemObj, (char *)SrcMem, nullptr, DimSrc, SrcSize, + copyH2D(*TgtQueue, SYCLMemObj, (char *)SrcMem, DimSrc, SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, - ur::cast(DstMem), std::move(TgtQueue), DimDst, - DstSize, DstAccessRange, DstOffset, DstElemSize, - std::move(DepEvents), OutEvent); + ur::cast(DstMem), DimDst, DstSize, + DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), + OutEvent); } else { if (!TgtQueue) - copyD2H(SYCLMemObj, ur::cast(SrcMem), - std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset, - SrcElemSize, (char *)DstMem, nullptr, DimDst, DstSize, + copyD2H(*SrcQueue, SYCLMemObj, ur::cast(SrcMem), DimSrc, + SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, (char *)DstMem, + DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, + std::move(DepEvents), OutEvent); + else + copyD2D(*SrcQueue, SYCLMemObj, ur::cast(SrcMem), DimSrc, + SrcSize, SrcAccessRange, SrcOffset, SrcElemSize, + ur::cast(DstMem), DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, std::move(DepEvents), OutEvent); - else - copyD2D(SYCLMemObj, ur::cast(SrcMem), - std::move(SrcQueue), DimSrc, SrcSize, SrcAccessRange, SrcOffset, - SrcElemSize, ur::cast(DstMem), - std::move(TgtQueue), DimDst, DstSize, DstAccessRange, DstOffset, - DstElemSize, std::move(DepEvents), OutEvent); } } -void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, +void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue, size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> MemRange, sycl::range<3> AccRange, sycl::id<3> Offset, @@ -785,9 +781,8 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, std::vector DepEvents, ur_event_handle_t &OutEvent) { assert(SYCLMemObj && "The SYCLMemObj is nullptr"); - assert(Queue && "Fill should be called only with a valid device queue"); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); if (SYCLMemObj->getType() == detail::SYCLMemObjI::MemObjType::Buffer) { @@ -801,7 +796,7 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, if (RangesUsable && OffsetUsable) { Adapter->call( - Queue->getHandleRef(), ur::cast(Mem), Pattern, + Queue.getHandleRef(), ur::cast(Mem), Pattern, PatternSize, Offset[0] * ElementSize, RangeMultiplier * ElementSize, DepEvents.size(), DepEvents.data(), &OutEvent); return; @@ -819,17 +814,12 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, } } -void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, +void *MemoryManager::map(SYCLMemObjI *, void *Mem, queue_impl &Queue, access::mode AccessMode, unsigned int, sycl::range<3>, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, std::vector DepEvents, ur_event_handle_t &OutEvent) { - if (!Queue) { - throw exception(make_error_code(errc::runtime), - "Not supported configuration of map requested"); - } - ur_map_flags_t Flags = 0; switch (AccessMode) { @@ -857,42 +847,36 @@ void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, void *MappedPtr = nullptr; const size_t BytesToMap = AccessRange[0] * AccessRange[1] * AccessRange[2]; - const AdapterPtr &Adapter = Queue->getAdapter(); - memBufferMapHelper(Adapter, Queue->getHandleRef(), + const AdapterPtr &Adapter = Queue.getAdapter(); + memBufferMapHelper(Adapter, Queue.getHandleRef(), ur::cast(Mem), false, Flags, AccessOffset[0], BytesToMap, DepEvents.size(), DepEvents.data(), &OutEvent, &MappedPtr); return MappedPtr; } -void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, +void MemoryManager::unmap(SYCLMemObjI *, void *Mem, queue_impl &Queue, void *MappedPtr, std::vector DepEvents, ur_event_handle_t &OutEvent) { - - // Execution on host is not supported here. - if (!Queue) { - throw exception(make_error_code(errc::runtime), - "Not supported configuration of unmap requested"); - } // All DepEvents are to the same Context. // Using the adapter of the Queue. - const AdapterPtr &Adapter = Queue->getAdapter(); - memUnmapHelper(Adapter, Queue->getHandleRef(), ur::cast(Mem), + const AdapterPtr &Adapter = Queue.getAdapter(); + memUnmapHelper(Adapter, Queue.getHandleRef(), ur::cast(Mem), MappedPtr, DepEvents.size(), DepEvents.data(), &OutEvent); } -void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, +void MemoryManager::copy_usm(const void *SrcMem, queue_impl &SrcQueue, size_t Len, void *DstMem, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(SrcQueue && "USM copy must be called with a valid device queue"); + const AdapterPtr &Adapter = SrcQueue.getAdapter(); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { - SrcQueue->getAdapter()->call( - SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(), - OutEvent); + Adapter->call(SrcQueue.getHandleRef(), + DepEvents.size(), + DepEvents.data(), OutEvent); } return; } @@ -901,22 +885,20 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, throw exception(make_error_code(errc::invalid), "NULL pointer argument in memory copy operation."); - const AdapterPtr &Adapter = SrcQueue->getAdapter(); - Adapter->call(SrcQueue->getHandleRef(), + Adapter->call(SrcQueue.getHandleRef(), /* blocking */ false, DstMem, SrcMem, Len, DepEvents.size(), DepEvents.data(), OutEvent); } -void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, +void MemoryManager::fill_usm(void *Mem, queue_impl &Queue, size_t Length, const std::vector &Pattern, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && "USM fill must be called with a valid device queue"); if (!Length) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { - Queue->getAdapter()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue.getAdapter()->call( + Queue.getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -924,43 +906,40 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, if (!Mem) throw exception(make_error_code(errc::invalid), "NULL pointer argument in memory fill operation."); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); Adapter->call( - Queue->getHandleRef(), Mem, Pattern.size(), Pattern.data(), Length, + Queue.getHandleRef(), Mem, Pattern.size(), Pattern.data(), Length, DepEvents.size(), DepEvents.data(), OutEvent); } -void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length, +void MemoryManager::prefetch_usm(void *Mem, queue_impl &Queue, size_t Length, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && "USM prefetch must be called with a valid device queue"); - const AdapterPtr &Adapter = Queue->getAdapter(); - Adapter->call(Queue->getHandleRef(), Mem, + const AdapterPtr &Adapter = Queue.getAdapter(); + Adapter->call(Queue.getHandleRef(), Mem, Length, 0, DepEvents.size(), DepEvents.data(), OutEvent); } -void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue, +void MemoryManager::advise_usm(const void *Mem, queue_impl &Queue, size_t Length, ur_usm_advice_flags_t Advice, std::vector /*DepEvents*/, ur_event_handle_t *OutEvent) { - assert(Queue && "USM advise must be called with a valid device queue"); - const AdapterPtr &Adapter = Queue->getAdapter(); - Adapter->call(Queue->getHandleRef(), Mem, + const AdapterPtr &Adapter = Queue.getAdapter(); + Adapter->call(Queue.getHandleRef(), Mem, Length, Advice, OutEvent); } void MemoryManager::copy_2d_usm(const void *SrcMem, size_t SrcPitch, - QueueImplPtr Queue, void *DstMem, + queue_impl &Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && "USM copy 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { - Queue->getAdapter()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue.getAdapter()->call( + Queue.getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -969,18 +948,18 @@ void MemoryManager::copy_2d_usm(const void *SrcMem, size_t SrcPitch, throw sycl::exception(sycl::make_error_code(errc::invalid), "NULL pointer argument in 2D memory copy operation."); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); bool SupportsUSMMemcpy2D = false; Adapter->call( - Queue->getContextImplPtr()->getHandleRef(), + Queue.getContextImplPtr()->getHandleRef(), UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, sizeof(bool), &SupportsUSMMemcpy2D, nullptr); if (SupportsUSMMemcpy2D) { // Direct memcpy2D is supported so we use this function. Adapter->call( - Queue->getHandleRef(), + Queue.getHandleRef(), /*blocking=*/false, DstMem, DstPitch, SrcMem, SrcPitch, Width, Height, DepEvents.size(), DepEvents.data(), OutEvent); return; @@ -988,7 +967,7 @@ void MemoryManager::copy_2d_usm(const void *SrcMem, size_t SrcPitch, // Otherwise we allow the special case where the copy is to or from host. #ifndef NDEBUG - context Ctx = createSyclObjFromImpl(Queue->getContextImplPtr()); + context Ctx = createSyclObjFromImpl(Queue.getContextImplPtr()); usm::alloc SrcAllocType = get_pointer_type(SrcMem, Ctx); usm::alloc DstAllocType = get_pointer_type(DstMem, Ctx); bool SrcIsHost = @@ -1009,28 +988,27 @@ void MemoryManager::copy_2d_usm(const void *SrcMem, size_t SrcPitch, char *DstItBegin = static_cast(DstMem) + I * DstPitch; const char *SrcItBegin = static_cast(SrcMem) + I * SrcPitch; Adapter->call( - Queue->getHandleRef(), + Queue.getHandleRef(), /* blocking */ false, DstItBegin, SrcItBegin, Width, DepEvents.size(), DepEvents.data(), CopyEvents.data() + I); CopyEventsManaged.emplace_back(CopyEvents[I], Adapter, /*TakeOwnership=*/true); } // Then insert a wait to coalesce the copy events. - Queue->getAdapter()->call( - Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent); + Queue.getAdapter()->call( + Queue.getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent); } -void MemoryManager::fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, +void MemoryManager::fill_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && "USM fill 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { - Queue->getAdapter()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue.getAdapter()->call( + Queue.getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -1038,23 +1016,21 @@ void MemoryManager::fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, if (!DstMem) throw sycl::exception(sycl::make_error_code(errc::invalid), "NULL pointer argument in 2D memory fill operation."); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); Adapter->call( - Queue->getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(), + Queue.getHandleRef(), DstMem, Pitch, Pattern.size(), Pattern.data(), Width, Height, DepEvents.size(), DepEvents.data(), OutEvent); } -void MemoryManager::memset_2d_usm(void *DstMem, QueueImplPtr Queue, - size_t Pitch, size_t Width, size_t Height, - char Value, +void MemoryManager::memset_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch, + size_t Width, size_t Height, char Value, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && "USM memset 2d must be called with a valid device queue"); if (Width == 0 || Height == 0) { // no-op, but ensure DepEvents will still be waited on if (!DepEvents.empty()) { - Queue->getAdapter()->call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); + Queue.getAdapter()->call( + Queue.getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -1069,13 +1045,11 @@ void MemoryManager::memset_2d_usm(void *DstMem, QueueImplPtr Queue, } static void -memcpyToDeviceGlobalUSM(QueueImplPtr Queue, +memcpyToDeviceGlobalUSM(queue_impl &Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && - "Copy to device global USM must be called with a valid device queue"); // Get or allocate USM memory for the device_global. DeviceGlobalUSMMem &DeviceGlobalUSM = DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(Queue); @@ -1083,7 +1057,7 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue, // OwnedPiEvent will keep the initialization event alive for the duration // of this function call. - OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getAdapter()); + OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue.getAdapter()); // We may need addtional events, so create a non-const dependency events list // to use if we need to modify it. @@ -1103,14 +1077,10 @@ memcpyToDeviceGlobalUSM(QueueImplPtr Queue, ActualDepEvents, OutEvent); } -static void -memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, - DeviceGlobalMapEntry *DeviceGlobalEntry, - size_t NumBytes, size_t Offset, void *Dest, - const std::vector &DepEvents, - ur_event_handle_t *OutEvent) { - assert(Queue && "Copying from device global USM must be called with a valid " - "device queue"); +static void memcpyFromDeviceGlobalUSM( + queue_impl &Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, + size_t Offset, void *Dest, const std::vector &DepEvents, + ur_event_handle_t *OutEvent) { // Get or allocate USM memory for the device_global. Since we are reading from // it, we need it initialized if it has not been yet. DeviceGlobalUSMMem &DeviceGlobalUSM = @@ -1119,7 +1089,7 @@ memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, // OwnedPiEvent will keep the initialization event alive for the duration // of this function call. - OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue->getAdapter()); + OwnedUrEvent ZIEvent = DeviceGlobalUSM.getInitEvent(Queue.getAdapter()); // We may need addtional events, so create a non-const dependency events list // to use if we need to modify it. @@ -1139,7 +1109,7 @@ memcpyFromDeviceGlobalUSM(QueueImplPtr Queue, } static ur_program_handle_t -getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, +getOrBuildProgramForDeviceGlobal(queue_impl &Queue, DeviceGlobalMapEntry *DeviceGlobalEntry) { assert(DeviceGlobalEntry->MIsDeviceImageScopeDecorated && "device_global is not device image scope decorated."); @@ -1155,8 +1125,8 @@ getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, "No image exists with the device_global."); // Look for cached programs with the device_global. - device Device = Queue->get_device(); - ContextImplPtr ContextImpl = Queue->getContextImplPtr(); + device Device = Queue.get_device(); + ContextImplPtr ContextImpl = Queue.getContextImplPtr(); std::optional CachedProgram = ContextImpl->getProgramForDeviceGlobal(Device, DeviceGlobalEntry); if (CachedProgram) @@ -1175,42 +1145,35 @@ getOrBuildProgramForDeviceGlobal(QueueImplPtr Queue, } static void -memcpyToDeviceGlobalDirect(QueueImplPtr Queue, +memcpyToDeviceGlobalDirect(queue_impl &Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, size_t Offset, const void *Src, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { - assert( - Queue && - "Direct copy to device global must be called with a valid device queue"); ur_program_handle_t Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); Adapter->call( - Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(), + Queue.getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(), false, NumBytes, Offset, Src, DepEvents.size(), DepEvents.data(), OutEvent); } -static void -memcpyFromDeviceGlobalDirect(QueueImplPtr Queue, - DeviceGlobalMapEntry *DeviceGlobalEntry, - size_t NumBytes, size_t Offset, void *Dest, - const std::vector &DepEvents, - ur_event_handle_t *OutEvent) { - assert(Queue && "Direct copy from device global must be called with a valid " - "device queue"); +static void memcpyFromDeviceGlobalDirect( + queue_impl &Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, size_t NumBytes, + size_t Offset, void *Dest, const std::vector &DepEvents, + ur_event_handle_t *OutEvent) { ur_program_handle_t Program = getOrBuildProgramForDeviceGlobal(Queue, DeviceGlobalEntry); - const AdapterPtr &Adapter = Queue->getAdapter(); + const AdapterPtr &Adapter = Queue.getAdapter(); Adapter->call( - Queue->getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(), + Queue.getHandleRef(), Program, DeviceGlobalEntry->MUniqueId.c_str(), false, NumBytes, Offset, Dest, DepEvents.size(), DepEvents.data(), OutEvent); } void MemoryManager::copy_to_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, + const void *DeviceGlobalPtr, bool IsDeviceImageScoped, queue_impl &Queue, size_t NumBytes, size_t Offset, const void *SrcMem, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { @@ -1232,7 +1195,7 @@ void MemoryManager::copy_to_device_global( } void MemoryManager::copy_from_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, + const void *DeviceGlobalPtr, bool IsDeviceImageScoped, queue_impl &Queue, size_t NumBytes, size_t Offset, void *DstMem, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { @@ -1583,15 +1546,13 @@ void MemoryManager::ext_oneapi_advise_usm_cmd_buffer( } void MemoryManager::copy_image_bindless( - QueueImplPtr Queue, const void *Src, void *Dst, + queue_impl &Queue, const void *Src, void *Dst, const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, ur_rect_offset_t DstOffset, ur_rect_region_t CopyExtent, const std::vector &DepEvents, ur_event_handle_t *OutEvent) { - assert(Queue && - "Copy image bindless must be called with a valid device queue"); assert((Flags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE || Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST || Flags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE || @@ -1602,7 +1563,7 @@ void MemoryManager::copy_image_bindless( sycl::make_error_code(errc::invalid), "NULL pointer argument in bindless image copy operation."); - const detail::AdapterPtr &Adapter = Queue->getAdapter(); + const detail::AdapterPtr &Adapter = Queue.getAdapter(); ur_exp_image_copy_region_t CopyRegion{}; CopyRegion.stype = UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION; @@ -1611,7 +1572,7 @@ void MemoryManager::copy_image_bindless( CopyRegion.dstOffset = DstOffset; Adapter->call( - Queue->getHandleRef(), Src, Dst, &SrcDesc, &DstDesc, &SrcFormat, + Queue.getHandleRef(), Src, Dst, &SrcDesc, &DstDesc, &SrcFormat, &DstFormat, &CopyRegion, Flags, DepEvents.size(), DepEvents.data(), OutEvent); } diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 9e28fa735c2df..adb68f9f7e421 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -107,17 +107,17 @@ class MemoryManager { // Copies memory between: host and device, host and host, // device and device if memory objects bound to the one context. - static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, QueueImplPtr SrcQueue, + static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, queue_impl *SrcQueue, unsigned int DimSrc, sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, void *DstMem, - QueueImplPtr TgtQueue, unsigned int DimDst, - sycl::range<3> DstSize, sycl::range<3> DstAccessRange, - sycl::id<3> DstOffset, unsigned int DstElemSize, + unsigned int SrcElemSize, void *DstMem, queue_impl *TgtQueue, + unsigned int DimDst, sycl::range<3> DstSize, + sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, + unsigned int DstElemSize, std::vector DepEvents, ur_event_handle_t &OutEvent); - static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, + static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue, size_t PatternSize, const unsigned char *Pattern, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, @@ -125,62 +125,62 @@ class MemoryManager { std::vector DepEvents, ur_event_handle_t &OutEvent); - static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, + static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue, access::mode AccessMode, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, unsigned int ElementSize, std::vector DepEvents, ur_event_handle_t &OutEvent); - static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, + static void unmap(SYCLMemObjI *SYCLMemObj, void *Mem, queue_impl &Queue, void *MappedPtr, std::vector DepEvents, ur_event_handle_t &OutEvent); - static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, + static void copy_usm(const void *SrcMem, queue_impl &Queue, size_t Len, void *DstMem, std::vector DepEvents, ur_event_handle_t *OutEvent); - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, + static void fill_usm(void *DstMem, queue_impl &Queue, size_t Len, const std::vector &Pattern, std::vector DepEvents, ur_event_handle_t *OutEvent); - static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, + static void prefetch_usm(void *Ptr, queue_impl &Queue, size_t Len, std::vector DepEvents, ur_event_handle_t *OutEvent); - static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, + static void advise_usm(const void *Ptr, queue_impl &Queue, size_t Len, ur_usm_advice_flags_t Advice, std::vector DepEvents, ur_event_handle_t *OutEvent); static void copy_2d_usm(const void *SrcMem, size_t SrcPitch, - QueueImplPtr Queue, void *DstMem, size_t DstPitch, + queue_impl &Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, std::vector DepEvents, ur_event_handle_t *OutEvent); - static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, + static void fill_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, std::vector DepEvents, ur_event_handle_t *OutEvent); - static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, + static void memset_2d_usm(void *DstMem, queue_impl &Queue, size_t Pitch, size_t Width, size_t Height, char Value, std::vector DepEvents, ur_event_handle_t *OutEvent); static void copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, - QueueImplPtr Queue, size_t NumBytes, size_t Offset, + queue_impl &Queue, size_t NumBytes, size_t Offset, const void *SrcMem, const std::vector &DepEvents, ur_event_handle_t *OutEvent); static void copy_from_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, - QueueImplPtr Queue, size_t NumBytes, size_t Offset, + queue_impl &Queue, size_t NumBytes, size_t Offset, void *DstMem, const std::vector &DepEvents, ur_event_handle_t *OutEvent); @@ -254,7 +254,7 @@ class MemoryManager { ur_exp_command_buffer_sync_point_t *OutSyncPoint); static void copy_image_bindless( - QueueImplPtr Queue, const void *Src, void *Dst, + queue_impl &Queue, const void *Src, void *Dst, const ur_image_desc_t &SrcDesc, const ur_image_desc_t &DstDesc, const ur_image_format_t &SrcFormat, const ur_image_format_t &DstFormat, const ur_exp_image_copy_flags_t Flags, ur_rect_offset_t SrcOffset, diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 16051062e8e37..fb3b711c01855 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -182,7 +182,7 @@ event queue_impl::memset(const std::shared_ptr &Self, return submitMemOpHelper( Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, - MemoryManager::fill_usm, Ptr, Self, Count, Pattern); + MemoryManager::fill_usm, Ptr, *this, Count, Pattern); } void report(const code_location &CodeLoc) { @@ -233,7 +233,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, return submitMemOpHelper( Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, - MemoryManager::copy_usm, Src, Self, Count, Dest); + MemoryManager::copy_usm, Src, *this, Count, Dest); } event queue_impl::mem_advise(const std::shared_ptr &Self, @@ -244,7 +244,7 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, return submitMemOpHelper( Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }, - MemoryManager::advise_usm, Ptr, Self, Length, Advice); + MemoryManager::advise_usm, Ptr, *this, Length, Advice); } event queue_impl::memcpyToDeviceGlobal( @@ -258,7 +258,7 @@ event queue_impl::memcpyToDeviceGlobal( NumBytes, Offset); }, MemoryManager::copy_to_device_global, DeviceGlobalPtr, IsDeviceImageScope, - Self, NumBytes, Offset, Src); + *this, NumBytes, Offset, Src); } event queue_impl::memcpyFromDeviceGlobal( @@ -272,7 +272,7 @@ event queue_impl::memcpyFromDeviceGlobal( NumBytes, Offset); }, MemoryManager::copy_from_device_global, DeviceGlobalPtr, - IsDeviceImageScope, Self, NumBytes, Offset, Dest); + IsDeviceImageScope, *this, NumBytes, Offset, Dest); } sycl::detail::optional @@ -449,7 +449,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemOpFuncT MemOpFunc, - MemOpArgTs... MemOpArgs) { + MemOpArgTs &&...MemOpArgs) { // We need to submit command and update the last event under same lock if we // have in-order queue. { @@ -468,7 +468,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, auto isNoEventsMode = trySwitchingToNoEventsMode(); if (!CallerNeedsEvent && isNoEventsMode) { NestedCallsTracker tracker; - MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), + MemOpFunc(std::forward(MemOpArgs)..., + getUrEvents(ExpandedDepEvents), /*PiEvent*/ nullptr); return createDiscardedEvent(); @@ -480,7 +481,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, NestedCallsTracker tracker; ur_event_handle_t UREvent = nullptr; EventImpl->setSubmissionTime(); - MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &UREvent); + MemOpFunc(std::forward(MemOpArgs)..., + getUrEvents(ExpandedDepEvents), &UREvent); EventImpl->setHandle(UREvent); EventImpl->setEnqueued(); // connect returned event with dependent events diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 4cf360140d680..9098c5df5bec6 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -955,7 +955,8 @@ class queue_impl { event submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, bool CallerNeedsEvent, HandlerFuncT HandlerFunc, - MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs); + MemMngrFuncT MemMngrFunc, + MemMngrArgTs &&...MemOpArgs); // When instrumentation is enabled emits trace event for wait begin and // returns the telemetry event generated for the wait diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 39cf63a3ba352..702f6692825b4 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -184,7 +184,8 @@ addCounterInit(handler &CGH, std::shared_ptr &Queue, EventImpl->setContextImpl(detail::getSyclObjImpl(Queue->get_context())); EventImpl->setStateIncomplete(); ur_event_handle_t UREvent = nullptr; - MemoryManager::fill_usm(Counter.get(), Queue, sizeof(int), {0}, {}, &UREvent); + MemoryManager::fill_usm(Counter.get(), *Queue, sizeof(int), {0}, {}, + &UREvent); EventImpl->setHandle(UREvent); CGH.depends_on(createSyclObjFromImpl(EventImpl)); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 739242128f0a2..b95efb4d2d1ee 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1303,7 +1303,7 @@ ur_result_t ReleaseCommand::enqueueImp() { if (auto Result = callMemOpHelper(MemoryManager::unmap, MAllocaCmd->getSYCLMemObj(), - Dst, Queue, Src, RawEvents, UREvent); + Dst, *Queue, Src, RawEvents, UREvent); Result != UR_RESULT_SUCCESS) return Result; @@ -1388,7 +1388,7 @@ ur_result_t MapMemObject::enqueueImp() { ur_event_handle_t UREvent = nullptr; if (auto Result = callMemOpHelperRet( *MDstPtr, MemoryManager::map, MSrcAllocaCmd->getSYCLMemObj(), - MSrcAllocaCmd->getMemAllocation(), MQueue, MMapMode, MSrcReq.MDims, + MSrcAllocaCmd->getMemAllocation(), *MQueue, MMapMode, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, MSrcReq.MElemSize, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) @@ -1471,7 +1471,7 @@ ur_result_t UnMapMemObject::enqueueImp() { ur_event_handle_t UREvent = nullptr; if (auto Result = callMemOpHelper(MemoryManager::unmap, MDstAllocaCmd->getSYCLMemObj(), - MDstAllocaCmd->getMemAllocation(), MQueue, *MSrcPtr, + MDstAllocaCmd->getMemAllocation(), *MQueue, *MSrcPtr, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) return Result; @@ -1580,9 +1580,9 @@ ur_result_t MemCpyCommand::enqueueImp() { if (auto Result = callMemOpHelper( MemoryManager::copy, MSrcAllocaCmd->getSYCLMemObj(), - MSrcAllocaCmd->getMemAllocation(), MSrcQueue, MSrcReq.MDims, + MSrcAllocaCmd->getMemAllocation(), MSrcQueue.get(), MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, - MSrcReq.MElemSize, MDstAllocaCmd->getMemAllocation(), MQueue, + MSrcReq.MElemSize, MDstAllocaCmd->getMemAllocation(), MQueue.get(), MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) @@ -1741,9 +1741,9 @@ ur_result_t MemCpyCommandHost::enqueueImp() { if (auto Result = callMemOpHelper( MemoryManager::copy, MSrcAllocaCmd->getSYCLMemObj(), - MSrcAllocaCmd->getMemAllocation(), MSrcQueue, MSrcReq.MDims, + MSrcAllocaCmd->getMemAllocation(), MSrcQueue.get(), MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, MSrcReq.MOffset, - MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, + MSrcReq.MElemSize, *MDstPtr, MQueue.get(), MDstReq.MDims, MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, MDstReq.MElemSize, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) @@ -3180,7 +3180,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (auto Result = callMemOpHelper( MemoryManager::copy, AllocaCmd->getSYCLMemObj(), - AllocaCmd->getMemAllocation(), MQueue, Req->MDims, + AllocaCmd->getMemAllocation(), MQueue.get(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, Copy->getDst(), nullptr, Req->MDims, Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/sycl::id<3>{0, 0, 0}, @@ -3201,7 +3201,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { MemoryManager::copy, AllocaCmd->getSYCLMemObj(), Copy->getSrc(), nullptr, Req->MDims, Req->MAccessRange, Req->MAccessRange, /*SrcOffset*/ sycl::id<3>{0, 0, 0}, Req->MElemSize, - AllocaCmd->getMemAllocation(), MQueue, Req->MDims, + AllocaCmd->getMemAllocation(), MQueue.get(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) @@ -3220,9 +3220,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (auto Result = callMemOpHelper( MemoryManager::copy, AllocaCmdSrc->getSYCLMemObj(), - AllocaCmdSrc->getMemAllocation(), MQueue, ReqSrc->MDims, + AllocaCmdSrc->getMemAllocation(), MQueue.get(), ReqSrc->MDims, ReqSrc->MMemoryRange, ReqSrc->MAccessRange, ReqSrc->MOffset, - ReqSrc->MElemSize, AllocaCmdDst->getMemAllocation(), MQueue, + ReqSrc->MElemSize, AllocaCmdDst->getMemAllocation(), MQueue.get(), ReqDst->MDims, ReqDst->MMemoryRange, ReqDst->MAccessRange, ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), UREvent); Result != UR_RESULT_SUCCESS) @@ -3238,7 +3238,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (auto Result = callMemOpHelper( MemoryManager::fill, AllocaCmd->getSYCLMemObj(), - AllocaCmd->getMemAllocation(), MQueue, Fill->MPattern.size(), + AllocaCmd->getMemAllocation(), *MQueue, Fill->MPattern.size(), Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, std::move(RawEvents), UREvent); @@ -3293,9 +3293,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } case CGType::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); - if (auto Result = callMemOpHelper(MemoryManager::copy_usm, Copy->getSrc(), - MQueue, Copy->getLength(), Copy->getDst(), - std::move(RawEvents), Event); + if (auto Result = callMemOpHelper( + MemoryManager::copy_usm, Copy->getSrc(), *MQueue, Copy->getLength(), + Copy->getDst(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3305,7 +3305,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { case CGType::FillUSM: { CGFillUSM *Fill = (CGFillUSM *)MCommandGroup.get(); if (auto Result = callMemOpHelper( - MemoryManager::fill_usm, Fill->getDst(), MQueue, Fill->getLength(), + MemoryManager::fill_usm, Fill->getDst(), *MQueue, Fill->getLength(), Fill->getPattern(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3316,7 +3316,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { case CGType::PrefetchUSM: { CGPrefetchUSM *Prefetch = (CGPrefetchUSM *)MCommandGroup.get(); if (auto Result = callMemOpHelper( - MemoryManager::prefetch_usm, Prefetch->getDst(), MQueue, + MemoryManager::prefetch_usm, Prefetch->getDst(), *MQueue, Prefetch->getLength(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3327,8 +3327,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { case CGType::AdviseUSM: { CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); if (auto Result = - callMemOpHelper(MemoryManager::advise_usm, Advise->getDst(), MQueue, - Advise->getLength(), Advise->getAdvice(), + callMemOpHelper(MemoryManager::advise_usm, Advise->getDst(), + *MQueue, Advise->getLength(), Advise->getAdvice(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3340,7 +3340,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { CGCopy2DUSM *Copy = (CGCopy2DUSM *)MCommandGroup.get(); if (auto Result = callMemOpHelper( MemoryManager::copy_2d_usm, Copy->getSrc(), Copy->getSrcPitch(), - MQueue, Copy->getDst(), Copy->getDstPitch(), Copy->getWidth(), + *MQueue, Copy->getDst(), Copy->getDstPitch(), Copy->getWidth(), Copy->getHeight(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3351,7 +3351,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { case CGType::Fill2DUSM: { CGFill2DUSM *Fill = (CGFill2DUSM *)MCommandGroup.get(); if (auto Result = callMemOpHelper( - MemoryManager::fill_2d_usm, Fill->getDst(), MQueue, + MemoryManager::fill_2d_usm, Fill->getDst(), *MQueue, Fill->getPitch(), Fill->getWidth(), Fill->getHeight(), Fill->getPattern(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) @@ -3363,7 +3363,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { case CGType::Memset2DUSM: { CGMemset2DUSM *Memset = (CGMemset2DUSM *)MCommandGroup.get(); if (auto Result = callMemOpHelper( - MemoryManager::memset_2d_usm, Memset->getDst(), MQueue, + MemoryManager::memset_2d_usm, Memset->getDst(), *MQueue, Memset->getPitch(), Memset->getWidth(), Memset->getHeight(), Memset->getValue(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) @@ -3649,7 +3649,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { CGCopyToDeviceGlobal *Copy = (CGCopyToDeviceGlobal *)MCommandGroup.get(); if (auto Result = callMemOpHelper( MemoryManager::copy_to_device_global, Copy->getDeviceGlobalPtr(), - Copy->isDeviceImageScoped(), MQueue, Copy->getNumBytes(), + Copy->isDeviceImageScoped(), *MQueue, Copy->getNumBytes(), Copy->getOffset(), Copy->getSrc(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3662,7 +3662,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { (CGCopyFromDeviceGlobal *)MCommandGroup.get(); if (auto Result = callMemOpHelper( MemoryManager::copy_from_device_global, Copy->getDeviceGlobalPtr(), - Copy->isDeviceImageScoped(), MQueue, Copy->getNumBytes(), + Copy->isDeviceImageScoped(), *MQueue, Copy->getNumBytes(), Copy->getOffset(), Copy->getDest(), std::move(RawEvents), Event); Result != UR_RESULT_SUCCESS) return Result; @@ -3707,7 +3707,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { CGCopyImage *Copy = (CGCopyImage *)MCommandGroup.get(); if (auto Result = callMemOpHelper( - MemoryManager::copy_image_bindless, MQueue, Copy->getSrc(), + MemoryManager::copy_image_bindless, *MQueue, Copy->getSrc(), Copy->getDst(), Copy->getSrcDesc(), Copy->getDstDesc(), Copy->getSrcFormat(), Copy->getDstFormat(), Copy->getCopyFlags(), Copy->getSrcOffset(), Copy->getDstOffset(), Copy->getCopyExtent(),