diff --git a/sycl/gdb/libsycl.so-gdb.py b/sycl/gdb/libsycl.so-gdb.py index 2a9d9b0e78550..b0c38d375da39 100644 --- a/sycl/gdb/libsycl.so-gdb.py +++ b/sycl/gdb/libsycl.so-gdb.py @@ -432,7 +432,7 @@ class SYCLQueue(SYCLValue): """Provides information about a sycl::queue from a gdb.Value.""" DEVICE_TYPE_NAME = "sycl::_V1::device" - IMPL_OFFSET_TO_DEVICE = 0x28 + IMPL_OFFSET_TO_DEVICE = 0x38 def __init__(self, gdb_value): super().__init__(gdb_value) diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 9569cd2a8028d..9ce7acec912c5 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -158,7 +158,7 @@ __SYCL_EXPORT queue make_queue(ur_native_handle_t NativeHandle, &UrQueue); // Construct the SYCL queue from UR queue. return detail::createSyclObjFromImpl( - std::make_shared(UrQueue, ContextImpl, Handler, PropList)); + queue_impl::create(UrQueue, ContextImpl, Handler, PropList)); } __SYCL_EXPORT event make_event(ur_native_handle_t NativeHandle, diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 771e84fde1b69..1a328507808b8 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -945,7 +945,7 @@ exec_graph_impl::exec_graph_impl(sycl::context Context, const std::shared_ptr &GraphImpl, const property_list &PropList) : MSchedule(), MGraphImpl(GraphImpl), MSyncPoints(), - MQueueImpl(std::make_shared( + MQueueImpl(sycl::detail::queue_impl::create( *sycl::detail::getSyclObjImpl(GraphImpl->getDevice()), sycl::detail::getSyclObjImpl(Context), sycl::async_handler{}, sycl::property_list{})), diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index fb3b711c01855..a8a51b47bbef6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -151,8 +151,7 @@ queue_impl::getExtendDependencyList(const std::vector &DepEvents, return MutableVec; } -event queue_impl::memset(const std::shared_ptr &Self, - void *Ptr, int Value, size_t Count, +event queue_impl::memset(void *Ptr, int Value, size_t Count, const std::vector &DepEvents, bool CallerNeedsEvent) { #if XPTI_ENABLE_INSTRUMENTATION @@ -180,7 +179,7 @@ event queue_impl::memset(const std::shared_ptr &Self, #endif const std::vector Pattern{static_cast(Value)}; return submitMemOpHelper( - Self, DepEvents, CallerNeedsEvent, + DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, MemoryManager::fill_usm, Ptr, *this, Count, Pattern); } @@ -198,8 +197,7 @@ void report(const code_location &CodeLoc) { std::cout << '\n'; } -event queue_impl::memcpy(const std::shared_ptr &Self, - void *Dest, const void *Src, size_t Count, +event queue_impl::memcpy(void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, bool CallerNeedsEvent, const code_location &CodeLoc) { #if XPTI_ENABLE_INSTRUMENTATION @@ -231,28 +229,28 @@ event queue_impl::memcpy(const std::shared_ptr &Self, "NULL pointer argument in memory copy operation."); } return submitMemOpHelper( - Self, DepEvents, CallerNeedsEvent, + DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, MemoryManager::copy_usm, Src, *this, Count, Dest); } -event queue_impl::mem_advise(const std::shared_ptr &Self, - const void *Ptr, size_t Length, +event queue_impl::mem_advise(const void *Ptr, size_t Length, ur_usm_advice_flags_t Advice, const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, CallerNeedsEvent, + DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }, MemoryManager::advise_usm, Ptr, *this, Length, Advice); } -event queue_impl::memcpyToDeviceGlobal( - const std::shared_ptr &Self, void *DeviceGlobalPtr, - const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents, bool CallerNeedsEvent) { +event queue_impl::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, + bool IsDeviceImageScope, size_t NumBytes, + size_t Offset, + const std::vector &DepEvents, + bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, CallerNeedsEvent, + DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset); @@ -261,12 +259,14 @@ event queue_impl::memcpyToDeviceGlobal( *this, NumBytes, Offset, Src); } -event queue_impl::memcpyFromDeviceGlobal( - const std::shared_ptr &Self, void *Dest, - const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, const std::vector &DepEvents, bool CallerNeedsEvent) { +event queue_impl::memcpyFromDeviceGlobal(void *Dest, + const void *DeviceGlobalPtr, + bool IsDeviceImageScope, + size_t NumBytes, size_t Offset, + const std::vector &DepEvents, + bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, CallerNeedsEvent, + DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset); @@ -275,8 +275,7 @@ event queue_impl::memcpyFromDeviceGlobal( IsDeviceImageScope, *this, NumBytes, Offset, Dest); } -sycl::detail::optional -queue_impl::getLastEvent(const std::shared_ptr &Self) { +sycl::detail::optional queue_impl::getLastEvent() { // The external event is required to finish last if set, so it is considered // the last event if present. if (std::optional ExternalEvent = MInOrderExternalEvent.read()) @@ -291,7 +290,7 @@ queue_impl::getLastEvent(const std::shared_ptr &Self) { if (LastEvent) return detail::createSyclObjFromImpl(LastEvent); // We insert a marker to represent an event at end. - return detail::createSyclObjFromImpl(insertMarkerEvent(Self)); + return detail::createSyclObjFromImpl(insertMarkerEvent()); } void queue_impl::addEvent(const detail::EventImplPtr &EventImpl) { @@ -307,16 +306,18 @@ void queue_impl::addEvent(const detail::EventImplPtr &EventImpl) { detail::EventImplPtr queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, queue_impl *SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, bool IsTopCodeLoc, const v1::SubmissionInfo &SubmitInfo) { #ifdef __INTEL_PREVIEW_BREAKING_CHANGES detail::handler_impl HandlerImplVal(SecondaryQueue, CallerNeedsEvent); detail::handler_impl *HandlerImpl = &HandlerImplVal; + // Inlining `Self` results in a crash when SYCL RT is built using MSVC with + // optimizations enabled. No crash if built using OneAPI. + auto Self = shared_from_this(); handler Handler(HandlerImpl, Self); #else - handler Handler(Self, SecondaryQueue, CallerNeedsEvent); + handler Handler(shared_from_this(), SecondaryQueue, CallerNeedsEvent); auto &HandlerImpl = detail::getSyclObjImpl(Handler); #endif @@ -398,9 +399,8 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, Stream->generateFlushCommand(ServiceCGH); }; detail::type_erased_cgfo_ty CGF{L}; - detail::EventImplPtr FlushEvent = - submit_impl(CGF, Self, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc, - IsTopCodeLoc, {}); + detail::EventImplPtr FlushEvent = submit_impl( + CGF, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc, IsTopCodeLoc, {}); if (EventImpl) EventImpl->attachEventToCompleteWeak(FlushEvent); registerStreamServiceEvent(FlushEvent); @@ -412,19 +412,17 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, #ifndef __INTEL_PREVIEW_BREAKING_CHANGES detail::EventImplPtr queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, - const std::shared_ptr &, + const std::shared_ptr & /*PrimaryQueue*/, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo) { - return submit_impl(CGF, Self, SecondaryQueue.get(), CallerNeedsEvent, Loc, + return submit_impl(CGF, SecondaryQueue.get(), CallerNeedsEvent, Loc, IsTopCodeLoc, SubmitInfo); } #endif template -event queue_impl::submitWithHandler(const std::shared_ptr &Self, - const std::vector &DepEvents, +event queue_impl::submitWithHandler(const std::vector &DepEvents, bool CallerNeedsEvent, HandlerFuncT HandlerFunc) { v1::SubmissionInfo SI{}; @@ -435,17 +433,16 @@ event queue_impl::submitWithHandler(const std::shared_ptr &Self, detail::type_erased_cgfo_ty CGF{L}; if (!CallerNeedsEvent && supportsDiscardingPiEvents()) { - submit_without_event(CGF, Self, SI, + submit_without_event(CGF, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true); return createDiscardedEvent(); } - return submit_with_event(CGF, Self, SI, + return submit_with_event(CGF, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true); } template -event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, - const std::vector &DepEvents, +event queue_impl::submitMemOpHelper(const std::vector &DepEvents, bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemOpFuncT MemOpFunc, @@ -475,7 +472,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, return createDiscardedEvent(); } - event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); + event ResEvent = prepareSYCLEventAssociatedWithQueue(shared_from_this()); const auto &EventImpl = detail::getSyclObjImpl(ResEvent); { NestedCallsTracker tracker; @@ -509,7 +506,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, return ResEvent; } } - return submitWithHandler(Self, DepEvents, CallerNeedsEvent, HandlerFunc); + return submitWithHandler(DepEvents, CallerNeedsEvent, HandlerFunc); } void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 9098c5df5bec6..2bb0b5d23f430 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -33,6 +33,7 @@ #include "detail/graph_impl.hpp" +#include #include #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -73,7 +74,13 @@ struct SubmissionInfoImpl { ext::oneapi::experimental::event_mode_enum::none; }; -class queue_impl { +class queue_impl : public std::enable_shared_from_this { + // `protected` is for unittests only, should really be private! +protected: + struct private_tag { + explicit private_tag() = default; + }; + public: // \return a default context for the platform if it includes the device // passed and default contexts are enabled, a new context otherwise. @@ -97,8 +104,9 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. queue_impl(device_impl &Device, const async_handler &AsyncHandler, - const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; + const property_list &PropList, private_tag tag) + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList, + tag) {}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -110,7 +118,8 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. queue_impl(device_impl &Device, const ContextImplPtr &Context, - const async_handler &AsyncHandler, const property_list &PropList) + const async_handler &AsyncHandler, const property_list &PropList, + private_tag) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), MIsInorder(has_property()), @@ -168,10 +177,8 @@ class queue_impl { trySwitchingToNoEventsMode(); } - sycl::detail::optional - getLastEvent(const std::shared_ptr &Self); + sycl::detail::optional getLastEvent(); -public: /// Constructs a SYCL queue from adapter interoperability handle. /// /// \param UrQueue is a raw UR queue handle. @@ -179,8 +186,8 @@ class queue_impl { /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. queue_impl(ur_queue_handle_t UrQueue, const ContextImplPtr &Context, - const async_handler &AsyncHandler) - : queue_impl(UrQueue, Context, AsyncHandler, {}) {} + const async_handler &AsyncHandler, private_tag tag) + : queue_impl(UrQueue, Context, AsyncHandler, {}, tag) {} /// Constructs a SYCL queue from adapter interoperability handle. /// @@ -190,7 +197,8 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is the queue properties. queue_impl(ur_queue_handle_t UrQueue, const ContextImplPtr &Context, - const async_handler &AsyncHandler, const property_list &PropList) + const async_handler &AsyncHandler, const property_list &PropList, + private_tag) : MDevice([&]() -> device_impl & { ur_device_handle_t DeviceUr{}; const AdapterPtr &Adapter = Context->getAdapter(); @@ -234,6 +242,15 @@ class queue_impl { trySwitchingToNoEventsMode(); } + // Single variadic method works because all the ctors are expected to be + // "public" except the `private_tag` part restricting the creation to + // `std::shared_ptr` allocations. + template + static std::shared_ptr create(Ts &&...args) { + return std::make_shared(std::forward(args)..., + private_tag{}); + } + ~queue_impl() { try { #if XPTI_ENABLE_INSTRUMENTATION @@ -318,14 +335,12 @@ class queue_impl { /// for execution on a secondary queue. /// /// \param CGF is a function object containing command group. - /// \param Self is a shared_ptr to this queue. /// \param SecondQueue is a shared_ptr to the secondary queue. /// \param Loc is the code location of the submit call (default argument) /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event submit(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, const std::shared_ptr &SecondQueue, const detail::code_location &Loc, bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess = nullptr) { @@ -334,35 +349,32 @@ class queue_impl { SI.SecondaryQueue() = SecondQueue; if (PostProcess) SI.PostProcessorFunc() = *PostProcess; - return submit_with_event(CGF, Self, SI, Loc, IsTopCodeLoc); + return submit_with_event(CGF, SI, Loc, IsTopCodeLoc); } /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. /// /// \param CGF is a function object containing command group. - /// \param Self is a shared_ptr to this queue. /// \param SubmitInfo is additional optional information for the submission. /// \param Loc is the code location of the submit call (default argument) /// \param StoreAdditionalInfo makes additional info be stored in event_impl /// \return a SYCL event object for the submitted command group. event submit_with_event(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, const v1::SubmissionInfo &SubmitInfo, const detail::code_location &Loc, bool IsTopCodeLoc) { detail::EventImplPtr ResEvent = - submit_impl(CGF, Self, SubmitInfo.SecondaryQueue().get(), + submit_impl(CGF, SubmitInfo.SecondaryQueue().get(), /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, SubmitInfo); return createSyclObjFromImpl(ResEvent); } void submit_without_event(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, const v1::SubmissionInfo &SubmitInfo, const detail::code_location &Loc, bool IsTopCodeLoc) { - submit_impl(CGF, Self, SubmitInfo.SecondaryQueue().get(), + submit_impl(CGF, SubmitInfo.SecondaryQueue().get(), /*CallerNeedsEvent=*/false, Loc, IsTopCodeLoc, SubmitInfo); } @@ -516,7 +528,6 @@ class queue_impl { /// Fills the memory pointed by a USM pointer with the value specified. /// - /// \param Self is a shared_ptr to this queue. /// \param Ptr is a USM pointer to the memory to fill. /// \param Value is a value to be set. Value is cast as an unsigned char. /// \param Count is a number of bytes to fill. @@ -524,13 +535,11 @@ class queue_impl { /// dependencies. /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing fill operation. - event memset(const std::shared_ptr &Self, void *Ptr, int Value, - size_t Count, const std::vector &DepEvents, - bool CallerNeedsEvent); + event memset(void *Ptr, int Value, size_t Count, + const std::vector &DepEvents, bool CallerNeedsEvent); /// Copies data from one memory region to another, both pointed by /// USM pointers. /// - /// \param Self is a shared_ptr to this queue. /// \param Dest is a USM pointer to the destination memory. /// \param Src is a USM pointer to the source memory. /// \param Count is a number of bytes to copy. @@ -538,14 +547,12 @@ class queue_impl { /// dependencies. /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing copy operation. - event memcpy(const std::shared_ptr &Self, void *Dest, - const void *Src, size_t Count, + event memcpy(void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, bool CallerNeedsEvent, const code_location &CodeLoc); /// Provides additional information to the underlying runtime about how /// different allocations are used. /// - /// \param Self is a shared_ptr to this queue. /// \param Ptr is a USM pointer to the allocation. /// \param Length is a number of bytes in the allocation. /// \param Advice is a device-defined advice for the specified allocation. @@ -553,8 +560,7 @@ class queue_impl { /// dependencies. /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing advise operation. - event mem_advise(const std::shared_ptr &Self, const void *Ptr, - size_t Length, ur_usm_advice_flags_t Advice, + event mem_advise(const void *Ptr, size_t Length, ur_usm_advice_flags_t Advice, const std::vector &DepEvents, bool CallerNeedsEvent); /// Puts exception to the list of asynchronous ecxeptions. @@ -581,13 +587,11 @@ class queue_impl { bool queue_empty() const; - event memcpyToDeviceGlobal(const std::shared_ptr &Self, - void *DeviceGlobalPtr, const void *Src, + event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector &DepEvents, bool CallerNeedsEvent); - event memcpyFromDeviceGlobal(const std::shared_ptr &Self, - void *Dest, const void *DeviceGlobalPtr, + event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector &DepEvents, @@ -658,8 +662,8 @@ class queue_impl { /// Inserts a marker event at the end of the queue. Waiting for this marker /// will wait for the completion of all work in the queue at the time of the /// insertion, but will not act as a barrier unless the queue is in-order. - EventImplPtr insertMarkerEvent(const std::shared_ptr &Self) { - auto ResEvent = std::make_shared(Self); + EventImplPtr insertMarkerEvent() { + auto ResEvent = std::make_shared(shared_from_this()); ur_event_handle_t UREvent = nullptr; getAdapter()->call(getHandleRef(), 0, nullptr, &UREvent); @@ -888,11 +892,10 @@ class queue_impl { /// Performs command group submission to the queue. /// /// \param CGF is a function object containing command group. - /// \param Self is a pointer to this queue. /// \param PrimaryQueue is a pointer to the primary queue. This may be the - /// same as Self. + /// same as this. /// \param SecondaryQueue is a pointer to the secondary queue. This may be the - /// same as Self. + /// same as this. /// \param CallerNeedsEvent is a boolean indicating whether the event is /// required by the user after the call. /// \param Loc is the code location of the submit call (default argument) @@ -900,7 +903,6 @@ class queue_impl { /// \return a SYCL event representing submitted command group. detail::EventImplPtr submit_impl(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, @@ -910,7 +912,6 @@ class queue_impl { /// Performs command group submission to the queue. /// /// \param CGF is a function object containing command group. - /// \param Self is a pointer to this queue. /// \param SecondaryQueue is a pointer to the secondary queue. /// \param CallerNeedsEvent is a boolean indicating whether the event is /// required by the user after the call. @@ -918,7 +919,6 @@ class queue_impl { /// \param SubmitInfo is additional optional information for the submission. /// \return a SYCL event representing submitted command group. detail::EventImplPtr submit_impl(const detail::type_erased_cgfo_ty &CGF, - const std::shared_ptr &Self, queue_impl *SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, @@ -926,19 +926,16 @@ class queue_impl { const v1::SubmissionInfo &SubmitInfo); /// Helper function for submitting a memory operation with a handler. - /// \param Self is a shared_ptr to this queue. /// \param DepEvents is a vector of dependencies of the operation. /// \param HandlerFunc is a function that submits the operation with a /// handler. template - event submitWithHandler(const std::shared_ptr &Self, - const std::vector &DepEvents, + event submitWithHandler(const std::vector &DepEvents, bool CallerNeedsEvent, HandlerFuncT HandlerFunc); /// Performs submission of a memory operation directly if scheduler can be /// bypassed, or with a handler otherwise. /// - /// \param Self is a shared_ptr to this queue. /// \param DepEvents is a vector of dependencies of the operation. /// \param CallerNeedsEvent specifies if the caller needs an event from this /// memory operation. @@ -952,8 +949,7 @@ class queue_impl { /// \return an event representing the submitted operation. template - event submitMemOpHelper(const std::shared_ptr &Self, - const std::vector &DepEvents, + event submitMemOpHelper(const std::vector &DepEvents, bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, MemMngrArgTs &&...MemOpArgs); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d4a2a3cef1251..a0e3e25e07d1c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -221,8 +221,8 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, // Since all the Scheduler commands require queue but we have only context // here, we need to create a dummy queue bound to the context and one of the // devices from the context. - QueueImplPtr InteropQueuePtr{new detail::queue_impl{ - Dev, InteropCtxPtr, /*AsyncHandler=*/{}, /*PropertyList=*/{}}}; + std::shared_ptr InteropQueuePtr = queue_impl::create( + Dev, InteropCtxPtr, async_handler{}, property_list{}); MemObject->MRecord.reset( new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency}); diff --git a/sycl/source/enqueue_functions.cpp b/sycl/source/enqueue_functions.cpp index 73c4ebe249467..b45d1b4f9f3cd 100644 --- a/sycl/source/enqueue_functions.cpp +++ b/sycl/source/enqueue_functions.cpp @@ -17,7 +17,7 @@ __SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, const sycl::detail::code_location &CodeLoc) { sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - QueueImplPtr->memcpy(QueueImplPtr, Dest, Src, NumBytes, {}, + QueueImplPtr->memcpy(Dest, Src, NumBytes, {}, /*CallerNeedsEvent=*/false, TlsCodeLocCapture.query()); } @@ -25,7 +25,7 @@ __SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes, const sycl::detail::code_location &CodeLoc) { sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - QueueImplPtr->memset(QueueImplPtr, Ptr, Value, NumBytes, {}, + QueueImplPtr->memset(Ptr, Value, NumBytes, {}, /*CallerNeedsEvent=*/false); } @@ -33,8 +33,7 @@ __SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, const sycl::detail::code_location &CodeLoc) { sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); - QueueImplPtr->mem_advise(QueueImplPtr, Ptr, NumBytes, - ur_usm_advice_flags_t(Advice), {}, + QueueImplPtr->mem_advise(Ptr, NumBytes, ur_usm_advice_flags_t(Advice), {}, /*CallerNeedsEvent=*/false); } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 174645fafd20c..8b39334f5b432 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -64,22 +64,22 @@ queue::queue(const context &SyclContext, const device_selector &DeviceSelector, const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp); - impl = std::make_shared( - *detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), - AsyncHandler, PropList); + impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice), + detail::getSyclObjImpl(SyclContext), + AsyncHandler, PropList); } queue::queue(const context &SyclContext, const device &SyclDevice, const async_handler &AsyncHandler, const property_list &PropList) { - impl = std::make_shared( - *detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), - AsyncHandler, PropList); + impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice), + detail::getSyclObjImpl(SyclContext), + AsyncHandler, PropList); } queue::queue(const device &SyclDevice, const async_handler &AsyncHandler, const property_list &PropList) { - impl = std::make_shared( - *detail::getSyclObjImpl(SyclDevice), AsyncHandler, PropList); + impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice), + AsyncHandler, PropList); } queue::queue(const context &SyclContext, const device_selector &deviceSelector, @@ -97,7 +97,7 @@ queue::queue(const context &SyclContext, const device &SyclDevice, queue::queue(cl_command_queue clQueue, const context &SyclContext, const async_handler &AsyncHandler) { const property_list PropList{}; - impl = std::make_shared( + impl = detail::queue_impl::create( // TODO(pi2ur): Don't cast straight from cl_command_queue reinterpret_cast(clQueue), detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); @@ -134,13 +134,13 @@ void queue::throw_asynchronous() { impl->throw_asynchronous(); } event queue::memset(void *Ptr, int Value, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true); + return impl->memset(Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {DepEvent}, + return impl->memset(Ptr, Value, Count, {DepEvent}, /*CallerNeedsEvent=*/true); } @@ -148,21 +148,21 @@ event queue::memset(void *Ptr, int Value, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, DepEvents, + return impl->memset(Ptr, Value, Count, DepEvents, /*CallerNeedsEvent=*/true); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true, + return impl->memcpy(Dest, Src, Count, {}, /*CallerNeedsEvent=*/true, TlsCodeLocCapture.query()); } event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, + return impl->memcpy(Dest, Src, Count, {DepEvent}, /*CallerNeedsEvent=*/true, TlsCodeLocCapture.query()); } @@ -170,21 +170,21 @@ event queue::memcpy(void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, DepEvents, + return impl->memcpy(Dest, Src, Count, DepEvents, /*CallerNeedsEvent=*/true, TlsCodeLocCapture.query()); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), {}, + return impl->mem_advise(Ptr, Length, ur_usm_advice_flags_t(Advice), {}, /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), + return impl->mem_advise(Ptr, Length, ur_usm_advice_flags_t(Advice), {DepEvent}, /*CallerNeedsEvent=*/true); } @@ -193,8 +193,7 @@ event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, ur_usm_advice_flags_t(Advice), - DepEvents, + return impl->mem_advise(Ptr, Length, ur_usm_advice_flags_t(Advice), DepEvents, /*CallerNeedsEvent=*/true); } @@ -212,12 +211,12 @@ event queue::submit_impl(std::function CGH, event queue::submit_impl(std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true); + return impl->submit(CGH, SecondQueue.impl, CodeLoc, true); } event queue::submit_impl(std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, IsTopCodeLoc); + return impl->submit(CGH, SecondQueue.impl, CodeLoc, IsTopCodeLoc); } void queue::submit_without_event_impl(std::function CGH, @@ -249,13 +248,13 @@ event queue::submit_impl_and_postprocess( std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc, const detail::SubmitPostProcessF &PostProcess) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true, &PostProcess); + return impl->submit(CGH, SecondQueue.impl, CodeLoc, true, &PostProcess); } event queue::submit_impl_and_postprocess( std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc, const detail::SubmitPostProcessF &PostProcess, bool IsTopCodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, IsTopCodeLoc, + return impl->submit(CGH, SecondQueue.impl, CodeLoc, IsTopCodeLoc, &PostProcess); } @@ -263,14 +262,14 @@ event queue::submit_with_event_impl(std::function CGH, const detail::SubmissionInfo &SubmitInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - return impl->submit_with_event(CGH, impl, SubmitInfo, CodeLoc, IsTopCodeLoc); + return impl->submit_with_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc); } void queue::submit_without_event_impl(std::function CGH, const detail::SubmissionInfo &SubmitInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { - impl->submit_without_event(CGH, impl, SubmitInfo, CodeLoc, IsTopCodeLoc); + impl->submit_without_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc); } event queue::submit_with_event_impl(const detail::type_erased_cgfo_ty &CGH, @@ -278,7 +277,7 @@ event queue::submit_with_event_impl(const detail::type_erased_cgfo_ty &CGH, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::v1::SubmissionInfo SI{SubmitInfo}; - return impl->submit_with_event(CGH, impl, SI, CodeLoc, IsTopCodeLoc); + return impl->submit_with_event(CGH, SI, CodeLoc, IsTopCodeLoc); } void queue::submit_without_event_impl(const detail::type_erased_cgfo_ty &CGH, @@ -286,7 +285,7 @@ void queue::submit_without_event_impl(const detail::type_erased_cgfo_ty &CGH, const detail::code_location &CodeLoc, bool IsTopCodeLoc) { detail::v1::SubmissionInfo SI{SubmitInfo}; - impl->submit_without_event(CGH, impl, SI, CodeLoc, IsTopCodeLoc); + impl->submit_without_event(CGH, SI, CodeLoc, IsTopCodeLoc); } event queue::submit_with_event_impl( @@ -310,14 +309,14 @@ event queue::submit_with_event_impl( const detail::type_erased_cgfo_ty &CGH, const detail::v1::SubmissionInfo &SubmitInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) const { - return impl->submit_with_event(CGH, impl, SubmitInfo, CodeLoc, IsTopCodeLoc); + return impl->submit_with_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc); } void queue::submit_without_event_impl( const detail::type_erased_cgfo_ty &CGH, const detail::v1::SubmissionInfo &SubmitInfo, const detail::code_location &CodeLoc, bool IsTopCodeLoc) const { - impl->submit_without_event(CGH, impl, SubmitInfo, CodeLoc, IsTopCodeLoc); + impl->submit_without_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc); } void queue::wait_proxy(const detail::code_location &CodeLoc) { @@ -337,7 +336,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { assert(!QueueImpl->hasCommandGraph() && "Should not be called in on graph recording."); - sycl::detail::optional LastEvent = QueueImpl->getLastEvent(QueueImpl); + sycl::detail::optional LastEvent = QueueImpl->getLastEvent(); if (LastEvent) return *LastEvent; @@ -434,18 +433,18 @@ event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector &DepEvents) { - return impl->memcpyToDeviceGlobal(impl, DeviceGlobalPtr, Src, - IsDeviceImageScope, NumBytes, Offset, - DepEvents, /*CallerNeedsEvent=*/true); + return impl->memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope, + NumBytes, Offset, DepEvents, + /*CallerNeedsEvent=*/true); } event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, const std::vector &DepEvents) { - return impl->memcpyFromDeviceGlobal(impl, Dest, DeviceGlobalPtr, - IsDeviceImageScope, NumBytes, Offset, - DepEvents, /*CallerNeedsEvent=*/true); + return impl->memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope, + NumBytes, Offset, DepEvents, + /*CallerNeedsEvent=*/true); } bool queue::device_has(aspect Aspect) const { @@ -462,7 +461,7 @@ sycl::detail::optional queue::ext_oneapi_get_last_event_impl() const { make_error_code(errc::invalid), "ext_oneapi_get_last_event() can only be called on in-order queues."); - return impl->getLastEvent(impl); + return impl->getLastEvent(); } void queue::ext_oneapi_set_external_event(const event &external_event) { diff --git a/sycl/test/gdb/printers.cpp b/sycl/test/gdb/printers.cpp index 6f03c7059e307..c6740f5ff19d5 100644 --- a/sycl/test/gdb/printers.cpp +++ b/sycl/test/gdb/printers.cpp @@ -72,7 +72,7 @@ sycl::range<1> r(3); // DEVICE: 16 | class sycl::range<> MemRange // CHECK: 0 | class sycl::detail::queue_impl -// CHECK: 40 | device_impl & MDevice +// CHECK: 56 | device_impl & MDevice // CHECK: 0 | class sycl::accessor // HOST: 0 | {{.*}} sycl::detail::AccessorImplHost{{.*}} impl diff --git a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp index fda6e32ba281d..8c68af2c53b79 100644 --- a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp +++ b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp @@ -28,7 +28,8 @@ class TestQueueImpl : public sycl::detail::queue_impl { public: TestQueueImpl(ContextImplPtr SyclContext, sycl::detail::device_impl &Dev) : sycl::detail::queue_impl(Dev, SyclContext, - SyclContext->get_async_handler(), {}) {} + SyclContext->get_async_handler(), {}, + sycl::detail::queue_impl::private_tag{}) {} using sycl::detail::queue_impl::MDefaultGraphDeps; using sycl::detail::queue_impl::MExtGraphDeps; using sycl::detail::queue_impl::MMutex; @@ -61,25 +62,25 @@ class BarrierHandlingWithHostTask : public ::testing::Test { auto L = [&](handler &CGH) { CGH.host_task(BlockHostTask ? CustomHostLambda : [] {}); }; - return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, - QueueDevImpl, nullptr, {}, true); + return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, nullptr, + {}, true); } else if (Type == TestCGType::KERNEL_TASK) { auto L = [&](handler &CGH) { CGH.single_task>([] {}); }; - return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, - QueueDevImpl, nullptr, {}, true); + return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, nullptr, + {}, true); } else // (Type == TestCGType::BARRIER) { auto L = [&](handler &CGH) { CGH.ext_oneapi_barrier(); }; - return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, - QueueDevImpl, nullptr, {}, true); + return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, nullptr, + {}, true); } } sycl::event InsertBarrierWithWaitList(const std::vector &WaitList) { auto L = [&](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }; - return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, - QueueDevImpl, nullptr, {}, true); + return QueueDevImpl->submit(sycl::detail::type_erased_cgfo_ty{L}, nullptr, + {}, true); } void BuildAndCheckInnerQueueState(std::vector &Events) { diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 02cce67098623..c36f3006406b2 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -25,7 +25,8 @@ class MockQueueImpl : public sycl::detail::queue_impl { MockQueueImpl(sycl::detail::device_impl &Device, const sycl::async_handler &AsyncHandler, const sycl::property_list &PropList) - : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} + : sycl::detail::queue_impl(Device, AsyncHandler, PropList, + sycl::detail::queue_impl::private_tag{}) {} using sycl::detail::queue_impl::finalizeHandlerInOrderHostTaskUnlocked; };