Skip to content

Commit fd4300f

Browse files
[NFCI][SYCL] Complete handler::MQueue->handler_impl move
Initially started in #18830 Subsequent PRs before this final one: #18794 #18834 #18748
1 parent 74782fe commit fd4300f

File tree

14 files changed

+137
-156
lines changed

14 files changed

+137
-156
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -424,28 +424,23 @@ template <int Dims> bool range_size_fits_in_size_t(const range<Dims> &r) {
424424
/// \ingroup sycl_api
425425
class __SYCL_EXPORT handler {
426426
private:
427+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
428+
/// Constructs SYCL handler from the pre-constructed stack-allocated
429+
/// `handler_impl` (not enforced, but meaningless to do a heap allocation
430+
/// outside handler instance).
431+
///
432+
/// \param HandlerImpl is a pre-constructed handler_impl.
433+
//
434+
// Can't provide this overload outside preview because `handler` lacks
435+
// required data members.
436+
handler(detail::handler_impl &HandlerImpl);
437+
#else
427438
/// Constructs SYCL handler from queue.
428439
///
429440
/// \param Queue is a SYCL queue.
430441
/// \param CallerNeedsEvent indicates if the event resulting from this handler
431442
/// is needed by the caller.
432-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
433-
handler(const std::shared_ptr<detail::queue_impl> &Queue,
434-
bool CallerNeedsEvent);
435-
#else
436443
handler(std::shared_ptr<detail::queue_impl> Queue, bool CallerNeedsEvent);
437-
#endif
438-
439-
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
440-
/// Constructs SYCL handler from the pre-constructed handler_impl and the
441-
/// associated queue. Inside of Graph implementation, the Queue value is not
442-
/// used, for those cases it can be initialized with an empty shared_ptr.
443-
///
444-
/// \param HandlerImpl is a pre-constructed handler_impl.
445-
/// \param Queue is a SYCL queue.
446-
handler(detail::handler_impl *HandlerImpl,
447-
const std::shared_ptr<detail::queue_impl> &Queue);
448-
#else
449444
/// Constructs SYCL handler from the associated queue and the submission's
450445
/// primary and secondary queue.
451446
///
@@ -456,20 +451,14 @@ class __SYCL_EXPORT handler {
456451
/// is null if no secondary queue is associated with the submission.
457452
/// \param CallerNeedsEvent indicates if the event resulting from this handler
458453
/// is needed by the caller.
459-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
460-
// TODO: This function is not used anymore, remove it in the next
461-
// ABI-breaking window.
462454
handler(std::shared_ptr<detail::queue_impl> Queue,
463455
std::shared_ptr<detail::queue_impl> PrimaryQueue,
464456
std::shared_ptr<detail::queue_impl> SecondaryQueue,
465457
bool CallerNeedsEvent);
466-
#endif
467458
__SYCL_DLL_LOCAL handler(std::shared_ptr<detail::queue_impl> Queue,
468459
detail::queue_impl *SecondaryQueue,
469460
bool CallerNeedsEvent);
470-
#endif
471461

472-
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
473462
/// Constructs SYCL handler from Graph.
474463
///
475464
/// The handler will add the command-group as a node to the graph rather than
@@ -3368,16 +3357,15 @@ class __SYCL_EXPORT handler {
33683357

33693358
private:
33703359
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
3371-
// In some cases we need to construct handler_impl in heap. Sole propose
3372-
// of MImplOwner is to destroy handler_impl in destructor of handler.
3373-
// Can't use unique_ptr because declaration of handler_impl is not available
3374-
// in this header.
3375-
std::shared_ptr<detail::handler_impl> MImplOwner;
3360+
// TODO: Maybe make it a reference when non-preview branch is removed.
3361+
// On the other hand, see `HandlerAccess:postProcess` to how `swap_impl` might
3362+
// be useful in future, pointer here would make that possible/easier.
33763363
detail::handler_impl *impl;
3377-
const std::shared_ptr<detail::queue_impl> &MQueue;
33783364
#else
33793365
std::shared_ptr<detail::handler_impl> impl;
3380-
std::shared_ptr<detail::queue_impl> MQueue;
3366+
3367+
// Use impl->get_queue*() instead:
3368+
std::shared_ptr<detail::queue_impl> MQueueDoNotUse;
33813369
#endif
33823370
std::vector<detail::LocalAccessorImplPtr> MLocalAccStorage;
33833371
std::vector<std::shared_ptr<detail::stream_impl>> MStreamStorage;

sycl/source/detail/async_alloc.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ getUrEvents(const std::vector<std::shared_ptr<detail::event_impl>> &DepEvents) {
3333
}
3434

3535
std::vector<std::shared_ptr<detail::node_impl>> getDepGraphNodes(
36-
sycl::handler &Handler, const std::shared_ptr<detail::queue_impl> &Queue,
36+
sycl::handler &Handler, detail::queue_impl *Queue,
3737
const std::shared_ptr<detail::graph_impl> &Graph,
3838
const std::vector<std::shared_ptr<detail::event_impl>> &DepEvents) {
3939
auto HandlerImpl = detail::getSyclObjImpl(Handler);
@@ -46,7 +46,7 @@ std::vector<std::shared_ptr<detail::node_impl>> getDepGraphNodes(
4646
// If this is being recorded from an in-order queue we need to get the last
4747
// in-order node if any, since this will later become a dependency of the
4848
// node being processed here.
49-
if (const auto &LastInOrderNode = Graph->getLastInorderNode(Queue.get());
49+
if (const auto &LastInOrderNode = Graph->getLastInorderNode(Queue);
5050
LastInOrderNode) {
5151
DepNodes.push_back(LastInOrderNode);
5252
}
@@ -78,10 +78,11 @@ void *async_malloc(sycl::handler &h, sycl::usm::alloc kind, size_t size) {
7878
ur_event_handle_t Event = nullptr;
7979
// If a graph is present do the allocation from the graph memory pool instead.
8080
if (auto Graph = h.getCommandGraph(); Graph) {
81-
auto DepNodes = getDepGraphNodes(h, h.MQueue, Graph, DepEvents);
81+
auto DepNodes =
82+
getDepGraphNodes(h, h.impl->get_queue_or_null(), Graph, DepEvents);
8283
alloc = Graph->getMemPool().malloc(size, kind, DepNodes);
8384
} else {
84-
auto &Q = h.MQueue->getHandleRef();
85+
ur_queue_handle_t Q = h.impl->get_queue().getHandleRef();
8586
Adapter->call<sycl::errc::runtime,
8687
sycl::detail::UrApiKind::urEnqueueUSMDeviceAllocExp>(
8788
Q, (ur_usm_pool_handle_t)0, size, nullptr, UREvents.size(),
@@ -128,13 +129,14 @@ __SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size,
128129
ur_event_handle_t Event = nullptr;
129130
// If a graph is present do the allocation from the graph memory pool instead.
130131
if (auto Graph = h.getCommandGraph(); Graph) {
131-
auto DepNodes = getDepGraphNodes(h, h.MQueue, Graph, DepEvents);
132+
auto DepNodes =
133+
getDepGraphNodes(h, h.impl->get_queue_or_null(), Graph, DepEvents);
132134

133135
// Memory pool is passed as the graph may use some properties of it.
134136
alloc = Graph->getMemPool().malloc(size, pool.get_alloc_kind(), DepNodes,
135137
sycl::detail::getSyclObjImpl(pool));
136138
} else {
137-
auto &Q = h.MQueue->getHandleRef();
139+
ur_queue_handle_t Q = h.impl->get_queue().getHandleRef();
138140
Adapter->call<sycl::errc::runtime,
139141
sycl::detail::UrApiKind::urEnqueueUSMDeviceAllocExp>(
140142
Q, memPoolImpl.get()->get_handle(), size, nullptr, UREvents.size(),

sycl/source/detail/context_impl.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,14 @@ void context_impl::addDeviceGlobalInitializer(
338338
}
339339
}
340340

341-
std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
342-
ur_program_handle_t NativePrg,
343-
const std::shared_ptr<queue_impl> &QueueImpl) {
341+
std::vector<ur_event_handle_t>
342+
context_impl::initializeDeviceGlobals(ur_program_handle_t NativePrg,
343+
queue_impl &QueueImpl) {
344344
if (!MDeviceGlobalNotInitializedCnt.load(std::memory_order_acquire))
345345
return {};
346346

347347
const AdapterPtr &Adapter = getAdapter();
348-
device_impl &DeviceImpl = QueueImpl->getDeviceImpl();
348+
device_impl &DeviceImpl = QueueImpl.getDeviceImpl();
349349
std::lock_guard<std::mutex> NativeProgramLock(MDeviceGlobalInitializersMutex);
350350
auto ImgIt = MDeviceGlobalInitializers.find(
351351
std::make_pair(NativePrg, DeviceImpl.getHandleRef()));
@@ -417,7 +417,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
417417
for (DeviceGlobalMapEntry *DeviceGlobalEntry : DeviceGlobalEntries) {
418418
// Get or allocate the USM memory associated with the device global.
419419
DeviceGlobalUSMMem &DeviceGlobalUSM =
420-
DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(*QueueImpl);
420+
DeviceGlobalEntry->getOrAllocateDeviceGlobalUSM(QueueImpl);
421421

422422
// If the device global still has a initialization event it should be
423423
// added to the initialization events list. Since initialization events
@@ -432,7 +432,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
432432
ur_event_handle_t InitEvent;
433433
void *const &USMPtr = DeviceGlobalUSM.getPtr();
434434
Adapter->call<UrApiKind::urEnqueueDeviceGlobalVariableWrite>(
435-
QueueImpl->getHandleRef(), NativePrg,
435+
QueueImpl.getHandleRef(), NativePrg,
436436
DeviceGlobalEntry->MUniqueId.c_str(), false, sizeof(void *), 0,
437437
&USMPtr, 0, nullptr, &InitEvent);
438438

sycl/source/detail/context_impl.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,7 @@ class context_impl : public std::enable_shared_from_this<context_impl> {
223223

224224
/// Initializes device globals for a program on the associated queue.
225225
std::vector<ur_event_handle_t>
226-
initializeDeviceGlobals(ur_program_handle_t NativePrg,
227-
const std::shared_ptr<queue_impl> &QueueImpl);
226+
initializeDeviceGlobals(ur_program_handle_t NativePrg, queue_impl &QueueImpl);
228227

229228
void memcpyToHostOnlyDeviceGlobal(device_impl &DeviceImpl,
230229
const void *DeviceGlobalPtr,

sycl/source/detail/event_impl.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ event_impl::event_impl(const QueueImplPtr &Queue)
176176
MState.store(HES_Complete);
177177
}
178178

179-
void event_impl::setQueue(const QueueImplPtr &Queue) {
180-
MQueue = Queue;
181-
MIsProfilingEnabled = Queue->MIsProfilingEnabled;
179+
void event_impl::setQueue(queue_impl &Queue) {
180+
MQueue = Queue.shared_from_this();
181+
MIsProfilingEnabled = Queue.MIsProfilingEnabled;
182182

183183
// TODO After setting the queue, the event is no longer default
184184
// constructed. Consider a design change which would allow

sycl/source/detail/event_impl.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class event_impl {
7474
/// as it was constructed with the queue based constructor.
7575
///
7676
/// \param Queue is a queue to be associated with the event
77-
void setQueue(const QueueImplPtr &Queue);
77+
void setQueue(queue_impl &Queue);
7878

7979
/// Waits for the event.
8080
///
@@ -234,14 +234,14 @@ class event_impl {
234234
/// Sets worker queue for command.
235235
///
236236
/// @return
237-
void setWorkerQueue(const QueueImplPtr &WorkerQueue) {
237+
void setWorkerQueue(std::weak_ptr<queue_impl> WorkerQueue) {
238238
MWorkerQueue = WorkerQueue;
239239
};
240240

241241
/// Sets original queue used for submission.
242242
///
243243
/// @return
244-
void setSubmittedQueue(const QueueImplPtr &SubmittedQueue) {
244+
void setSubmittedQueue(std::weak_ptr<queue_impl> SubmittedQueue) {
245245
MSubmittedQueue = SubmittedQueue;
246246
};
247247

sycl/source/detail/graph_impl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ graph_impl::add(std::function<void(handler &)> CGF,
505505
(void)Args;
506506
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
507507
detail::handler_impl HandlerImpl{*this};
508-
sycl::handler Handler{&HandlerImpl, std::shared_ptr<detail::queue_impl>{}};
508+
sycl::handler Handler{HandlerImpl};
509509
#else
510510
sycl::handler Handler{shared_from_this()};
511511
#endif
@@ -2310,7 +2310,7 @@ void dynamic_command_group_impl::finalizeCGFList(
23102310
// as a single command-group with multiple commands inside.
23112311
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
23122312
detail::handler_impl HandlerImpl{*MGraph};
2313-
sycl::handler Handler{&HandlerImpl, std::shared_ptr<detail::queue_impl>{}};
2313+
sycl::handler Handler{HandlerImpl};
23142314
#else
23152315
sycl::handler Handler{MGraph};
23162316
#endif

sycl/source/detail/queue_impl.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -311,15 +311,11 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
311311
const v1::SubmissionInfo &SubmitInfo) {
312312
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
313313
detail::handler_impl HandlerImplVal(*this, SecondaryQueue, CallerNeedsEvent);
314-
detail::handler_impl *HandlerImpl = &HandlerImplVal;
315-
// Inlining `Self` results in a crash when SYCL RT is built using MSVC with
316-
// optimizations enabled. No crash if built using OneAPI.
317-
auto Self = shared_from_this();
318-
handler Handler(HandlerImpl, Self);
314+
handler Handler(HandlerImplVal);
319315
#else
320316
handler Handler(shared_from_this(), SecondaryQueue, CallerNeedsEvent);
321-
auto &HandlerImpl = detail::getSyclObjImpl(Handler);
322317
#endif
318+
auto &HandlerImpl = detail::getSyclObjImpl(Handler);
323319

324320
#ifdef XPTI_ENABLE_INSTRUMENTATION
325321
if (xptiTraceEnabled()) {

sycl/source/detail/queue_impl.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -687,10 +687,12 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
687687
protected:
688688
template <typename HandlerType = handler>
689689
EventImplPtr insertHelperBarrier(const HandlerType &Handler) {
690-
auto ResEvent = std::make_shared<detail::event_impl>(Handler.MQueue);
690+
auto &Queue = Handler.impl->get_queue();
691+
auto ResEvent =
692+
std::make_shared<detail::event_impl>(Queue.shared_from_this());
691693
ur_event_handle_t UREvent = nullptr;
692694
getAdapter()->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
693-
Handler.MQueue->getHandleRef(), 0, nullptr, &UREvent);
695+
Queue.getHandleRef(), 0, nullptr, &UREvent);
694696
ResEvent->setHandle(UREvent);
695697
return ResEvent;
696698
}

0 commit comments

Comments
 (0)