@@ -127,11 +127,14 @@ static unsigned long long getQueueID(const std::shared_ptr<queue_impl> &Queue) {
127
127
}
128
128
#endif
129
129
130
- static context_impl *getContext (const QueueImplPtr & Queue) {
130
+ static context_impl *getContext (queue_impl * Queue) {
131
131
if (Queue)
132
132
return &Queue->getContextImpl ();
133
133
return nullptr ;
134
134
}
135
+ static context_impl *getContext (const std::shared_ptr<queue_impl> &Queue) {
136
+ return getContext (Queue.get ());
137
+ }
135
138
136
139
#ifdef __SYCL_ENABLE_GNU_DEMANGLING
137
140
struct DemangleHandle {
@@ -510,7 +513,7 @@ void Command::waitForPreparedHostEvents() const {
510
513
HostEvent->waitInternal ();
511
514
}
512
515
513
- void Command::waitForEvents (QueueImplPtr Queue,
516
+ void Command::waitForEvents (queue_impl * Queue,
514
517
std::vector<EventImplPtr> &EventImpls,
515
518
ur_event_handle_t &Event) {
516
519
#ifndef NDEBUG
@@ -566,12 +569,12 @@ void Command::waitForEvents(QueueImplPtr Queue,
566
569
// / references to event_impl class members because Command
567
570
// / should not outlive the event connected to it.
568
571
Command::Command (
569
- CommandType Type, QueueImplPtr Queue,
572
+ CommandType Type, queue_impl * Queue,
570
573
ur_exp_command_buffer_handle_t CommandBuffer,
571
574
const std::vector<ur_exp_command_buffer_sync_point_t > &SyncPoints)
572
- : MQueue(std::move( Queue) ),
573
- MEvent(MQueue ? detail::event_impl::create_device_event(*MQueue )
574
- : detail::event_impl::create_incomplete_host_event()),
575
+ : MQueue(Queue ? Queue-> shared_from_this () : nullptr ),
576
+ MEvent(Queue ? detail::event_impl::create_device_event(*Queue )
577
+ : detail::event_impl::create_incomplete_host_event()),
575
578
MPreparedDepsEvents(MEvent->getPreparedDepsEvents ()),
576
579
MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents ()), MType(Type),
577
580
MCommandBuffer(CommandBuffer), MSyncPointDeps(SyncPoints) {
@@ -1034,7 +1037,7 @@ void Command::copySubmissionCodeLocation() {
1034
1037
#endif
1035
1038
}
1036
1039
1037
- AllocaCommandBase::AllocaCommandBase (CommandType Type, QueueImplPtr Queue,
1040
+ AllocaCommandBase::AllocaCommandBase (CommandType Type, queue_impl * Queue,
1038
1041
Requirement Req,
1039
1042
AllocaCommandBase *LinkedAllocaCmd,
1040
1043
bool IsConst)
@@ -1077,10 +1080,10 @@ bool AllocaCommandBase::supportsPostEnqueueCleanup() const { return false; }
1077
1080
1078
1081
bool AllocaCommandBase::readyForCleanup () const { return false ; }
1079
1082
1080
- AllocaCommand::AllocaCommand (QueueImplPtr Queue, Requirement Req,
1083
+ AllocaCommand::AllocaCommand (queue_impl * Queue, Requirement Req,
1081
1084
bool InitFromUserData,
1082
1085
AllocaCommandBase *LinkedAllocaCmd, bool IsConst)
1083
- : AllocaCommandBase(CommandType::ALLOCA, std::move( Queue) , std::move(Req),
1086
+ : AllocaCommandBase(CommandType::ALLOCA, Queue, std::move(Req),
1084
1087
LinkedAllocaCmd, IsConst),
1085
1088
MInitFromUserData(InitFromUserData) {
1086
1089
// Node event must be created before the dependent edge is added to this
@@ -1115,7 +1118,7 @@ ur_result_t AllocaCommand::enqueueImp() {
1115
1118
1116
1119
if (!MQueue) {
1117
1120
// Do not need to make allocation if we have a linked device allocation
1118
- Command::waitForEvents (MQueue, EventImpls, UREvent);
1121
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
1119
1122
MEvent->setHandle (UREvent);
1120
1123
1121
1124
return UR_RESULT_SUCCESS;
@@ -1155,12 +1158,11 @@ void AllocaCommand::printDot(std::ostream &Stream) const {
1155
1158
}
1156
1159
}
1157
1160
1158
- AllocaSubBufCommand::AllocaSubBufCommand (QueueImplPtr Queue, Requirement Req,
1161
+ AllocaSubBufCommand::AllocaSubBufCommand (queue_impl * Queue, Requirement Req,
1159
1162
AllocaCommandBase *ParentAlloca,
1160
1163
std::vector<Command *> &ToEnqueue,
1161
1164
std::vector<Command *> &ToCleanUp)
1162
- : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue),
1163
- std::move(Req),
1165
+ : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, Queue, std::move(Req),
1164
1166
/* LinkedAllocaCmd*/ nullptr, /* IsConst*/ false),
1165
1167
MParentAlloca(ParentAlloca) {
1166
1168
// Node event must be created before the dependent edge
@@ -1241,8 +1243,8 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const {
1241
1243
}
1242
1244
}
1243
1245
1244
- ReleaseCommand::ReleaseCommand (QueueImplPtr Queue, AllocaCommandBase *AllocaCmd)
1245
- : Command(CommandType::RELEASE, std::move( Queue) ), MAllocaCmd(AllocaCmd) {
1246
+ ReleaseCommand::ReleaseCommand (queue_impl * Queue, AllocaCommandBase *AllocaCmd)
1247
+ : Command(CommandType::RELEASE, Queue), MAllocaCmd(AllocaCmd) {
1246
1248
emitInstrumentationDataProxy ();
1247
1249
}
1248
1250
@@ -1295,9 +1297,9 @@ ur_result_t ReleaseCommand::enqueueImp() {
1295
1297
}
1296
1298
1297
1299
if (NeedUnmap) {
1298
- const QueueImplPtr & Queue = CurAllocaIsHost
1299
- ? MAllocaCmd->MLinkedAllocaCmd ->getQueue ()
1300
- : MAllocaCmd->getQueue ();
1300
+ queue_impl * Queue = CurAllocaIsHost
1301
+ ? MAllocaCmd->MLinkedAllocaCmd ->getQueue ()
1302
+ : MAllocaCmd->getQueue ();
1301
1303
1302
1304
assert (Queue);
1303
1305
@@ -1328,7 +1330,7 @@ ur_result_t ReleaseCommand::enqueueImp() {
1328
1330
}
1329
1331
ur_event_handle_t UREvent = nullptr ;
1330
1332
if (SkipRelease)
1331
- Command::waitForEvents (MQueue, EventImpls, UREvent);
1333
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
1332
1334
else {
1333
1335
if (auto Result = callMemOpHelper (
1334
1336
MemoryManager::release, getContext (MQueue),
@@ -1366,11 +1368,10 @@ bool ReleaseCommand::supportsPostEnqueueCleanup() const { return false; }
1366
1368
bool ReleaseCommand::readyForCleanup () const { return false ; }
1367
1369
1368
1370
MapMemObject::MapMemObject (AllocaCommandBase *SrcAllocaCmd, Requirement Req,
1369
- void **DstPtr, QueueImplPtr Queue,
1371
+ void **DstPtr, queue_impl * Queue,
1370
1372
access::mode MapMode)
1371
- : Command(CommandType::MAP_MEM_OBJ, std::move(Queue)),
1372
- MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr),
1373
- MMapMode(MapMode) {
1373
+ : Command(CommandType::MAP_MEM_OBJ, Queue), MSrcAllocaCmd(SrcAllocaCmd),
1374
+ MSrcReq(std::move(Req)), MDstPtr(DstPtr), MMapMode(MapMode) {
1374
1375
emitInstrumentationDataProxy ();
1375
1376
}
1376
1377
@@ -1430,9 +1431,9 @@ void MapMemObject::printDot(std::ostream &Stream) const {
1430
1431
}
1431
1432
1432
1433
UnMapMemObject::UnMapMemObject (AllocaCommandBase *DstAllocaCmd, Requirement Req,
1433
- void **SrcPtr, QueueImplPtr Queue)
1434
- : Command(CommandType::UNMAP_MEM_OBJ, std::move( Queue)),
1435
- MDstAllocaCmd(DstAllocaCmd), MDstReq(std::move(Req)), MSrcPtr(SrcPtr) {
1434
+ void **SrcPtr, queue_impl * Queue)
1435
+ : Command(CommandType::UNMAP_MEM_OBJ, Queue), MDstAllocaCmd(DstAllocaCmd ),
1436
+ MDstReq(std::move(Req)), MSrcPtr(SrcPtr) {
1436
1437
emitInstrumentationDataProxy ();
1437
1438
}
1438
1439
@@ -1516,11 +1517,11 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
1516
1517
AllocaCommandBase *SrcAllocaCmd,
1517
1518
Requirement DstReq,
1518
1519
AllocaCommandBase *DstAllocaCmd,
1519
- QueueImplPtr SrcQueue, QueueImplPtr DstQueue)
1520
- : Command(CommandType::COPY_MEMORY, std::move( DstQueue) ),
1521
- MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq) ),
1522
- MSrcAllocaCmd(SrcAllocaCmd), MDstReq( std::move(DstReq) ),
1523
- MDstAllocaCmd(DstAllocaCmd) {
1520
+ queue_impl * SrcQueue, queue_impl * DstQueue)
1521
+ : Command(CommandType::COPY_MEMORY, DstQueue),
1522
+ MSrcQueue(SrcQueue ? SrcQueue-> shared_from_this () : nullptr ),
1523
+ MSrcReq( std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd ),
1524
+ MDstReq(std::move(DstReq)), MDstAllocaCmd(DstAllocaCmd) {
1524
1525
if (MSrcQueue) {
1525
1526
MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
1526
1527
}
@@ -1652,7 +1653,7 @@ ur_result_t UpdateHostRequirementCommand::enqueueImp() {
1652
1653
waitForPreparedHostEvents ();
1653
1654
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
1654
1655
ur_event_handle_t UREvent = nullptr ;
1655
- Command::waitForEvents (MQueue, EventImpls, UREvent);
1656
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
1656
1657
MEvent->setHandle (UREvent);
1657
1658
1658
1659
assert (MSrcAllocaCmd && " Expected valid alloca command" );
@@ -1689,11 +1690,11 @@ void UpdateHostRequirementCommand::printDot(std::ostream &Stream) const {
1689
1690
MemCpyCommandHost::MemCpyCommandHost (Requirement SrcReq,
1690
1691
AllocaCommandBase *SrcAllocaCmd,
1691
1692
Requirement DstReq, void **DstPtr,
1692
- QueueImplPtr SrcQueue,
1693
- QueueImplPtr DstQueue)
1694
- : Command(CommandType::COPY_MEMORY, std::move(DstQueue) ),
1695
- MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)),
1696
- MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) {
1693
+ queue_impl * SrcQueue, queue_impl *DstQueue)
1694
+ : Command(CommandType::COPY_MEMORY, DstQueue),
1695
+ MSrcQueue(SrcQueue ? SrcQueue-> shared_from_this () : nullptr ),
1696
+ MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd ),
1697
+ MDstReq(std::move(DstReq)), MDstPtr(DstPtr) {
1697
1698
if (MSrcQueue) {
1698
1699
MEvent->setContextImpl (MSrcQueue->getContextImplPtr ());
1699
1700
}
@@ -1735,7 +1736,7 @@ ContextImplPtr MemCpyCommandHost::getWorkerContext() const {
1735
1736
}
1736
1737
1737
1738
ur_result_t MemCpyCommandHost::enqueueImp () {
1738
- const QueueImplPtr & Queue = MWorkerQueue;
1739
+ queue_impl * Queue = MWorkerQueue. get () ;
1739
1740
waitForPreparedHostEvents ();
1740
1741
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
1741
1742
std::vector<ur_event_handle_t > RawEvents = getUrEvents (EventImpls);
@@ -1774,7 +1775,7 @@ EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) {
1774
1775
ur_result_t EmptyCommand::enqueueImp () {
1775
1776
waitForPreparedHostEvents ();
1776
1777
ur_event_handle_t UREvent = nullptr ;
1777
- waitForEvents (MQueue, MPreparedDepsEvents, UREvent);
1778
+ waitForEvents (MQueue. get () , MPreparedDepsEvents, UREvent);
1778
1779
MEvent->setHandle (UREvent);
1779
1780
return UR_RESULT_SUCCESS;
1780
1781
}
@@ -1858,9 +1859,9 @@ void MemCpyCommandHost::printDot(std::ostream &Stream) const {
1858
1859
}
1859
1860
1860
1861
UpdateHostRequirementCommand::UpdateHostRequirementCommand (
1861
- QueueImplPtr Queue, Requirement Req, AllocaCommandBase *SrcAllocaCmd,
1862
+ queue_impl * Queue, Requirement Req, AllocaCommandBase *SrcAllocaCmd,
1862
1863
void **DstPtr)
1863
- : Command(CommandType::UPDATE_REQUIREMENT, std::move( Queue) ),
1864
+ : Command(CommandType::UPDATE_REQUIREMENT, Queue),
1864
1865
MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(Req)), MDstPtr(DstPtr) {
1865
1866
1866
1867
emitInstrumentationDataProxy ();
@@ -1956,11 +1957,10 @@ static std::string_view cgTypeToString(detail::CGType Type) {
1956
1957
}
1957
1958
1958
1959
ExecCGCommand::ExecCGCommand (
1959
- std::unique_ptr<detail::CG> CommandGroup, QueueImplPtr Queue,
1960
+ std::unique_ptr<detail::CG> CommandGroup, queue_impl * Queue,
1960
1961
bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer,
1961
1962
const std::vector<ur_exp_command_buffer_sync_point_t > &Dependencies)
1962
- : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer,
1963
- Dependencies),
1963
+ : Command(CommandType::RUN_CG, Queue, CommandBuffer, Dependencies),
1964
1964
MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) {
1965
1965
if (MCommandGroup->getType () == detail::CGType::CodeplayHostTask) {
1966
1966
MEvent->setSubmittedQueue (
@@ -2777,20 +2777,18 @@ void enqueueImpKernel(
2777
2777
}
2778
2778
}
2779
2779
2780
- ur_result_t enqueueReadWriteHostPipe (const QueueImplPtr &Queue,
2780
+ ur_result_t enqueueReadWriteHostPipe (queue_impl &Queue,
2781
2781
const std::string &PipeName, bool blocking,
2782
2782
void *ptr, size_t size,
2783
2783
std::vector<ur_event_handle_t > &RawEvents,
2784
2784
detail::event_impl *OutEventImpl,
2785
2785
bool read) {
2786
- assert (Queue &&
2787
- " ReadWrite host pipe submissions should have an associated queue" );
2788
2786
detail::HostPipeMapEntry *hostPipeEntry =
2789
2787
ProgramManager::getInstance ().getHostPipeEntry (PipeName);
2790
2788
2791
2789
ur_program_handle_t Program = nullptr ;
2792
- device Device = Queue-> get_device ();
2793
- ContextImplPtr ContextImpl = Queue-> getContextImplPtr ();
2790
+ device Device = Queue. get_device ();
2791
+ ContextImplPtr ContextImpl = Queue. getContextImplPtr ();
2794
2792
std::optional<ur_program_handle_t > CachedProgram =
2795
2793
ContextImpl->getProgramForHostPipe (Device, hostPipeEntry);
2796
2794
if (CachedProgram)
@@ -2799,17 +2797,16 @@ ur_result_t enqueueReadWriteHostPipe(const QueueImplPtr &Queue,
2799
2797
// If there was no cached program, build one.
2800
2798
device_image_plain devImgPlain =
2801
2799
ProgramManager::getInstance ().getDeviceImageFromBinaryImage (
2802
- hostPipeEntry->getDevBinImage (), Queue->get_context (),
2803
- Queue->get_device ());
2800
+ hostPipeEntry->getDevBinImage (), Queue.get_context (), Device);
2804
2801
device_image_plain BuiltImage = ProgramManager::getInstance ().build (
2805
2802
std::move (devImgPlain), {std::move (Device)}, {});
2806
2803
Program = getSyclObjImpl (BuiltImage)->get_ur_program_ref ();
2807
2804
}
2808
2805
assert (Program && " Program for this hostpipe is not compiled." );
2809
2806
2810
- const AdapterPtr &Adapter = Queue-> getAdapter ();
2807
+ const AdapterPtr &Adapter = Queue. getAdapter ();
2811
2808
2812
- ur_queue_handle_t ur_q = Queue-> getHandleRef ();
2809
+ ur_queue_handle_t ur_q = Queue. getHandleRef ();
2813
2810
ur_result_t Error;
2814
2811
2815
2812
ur_event_handle_t UREvent = nullptr ;
@@ -3667,7 +3664,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
3667
3664
if (!EventImpl) {
3668
3665
EventImpl = MEvent.get ();
3669
3666
}
3670
- return enqueueReadWriteHostPipe (MQueue, pipeName, blocking, hostPtr,
3667
+ return enqueueReadWriteHostPipe (* MQueue, pipeName, blocking, hostPtr,
3671
3668
typeSize, RawEvents, EventImpl, read);
3672
3669
}
3673
3670
case CGType::ExecCommandBuffer: {
@@ -3802,7 +3799,7 @@ bool ExecCGCommand::readyForCleanup() const {
3802
3799
}
3803
3800
3804
3801
UpdateCommandBufferCommand::UpdateCommandBufferCommand (
3805
- QueueImplPtr Queue,
3802
+ queue_impl * Queue,
3806
3803
ext::oneapi::experimental::detail::exec_graph_impl *Graph,
3807
3804
std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
3808
3805
Nodes)
@@ -3813,7 +3810,7 @@ ur_result_t UpdateCommandBufferCommand::enqueueImp() {
3813
3810
waitForPreparedHostEvents ();
3814
3811
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
3815
3812
ur_event_handle_t UREvent = nullptr ;
3816
- Command::waitForEvents (MQueue, EventImpls, UREvent);
3813
+ Command::waitForEvents (MQueue. get () , EventImpls, UREvent);
3817
3814
MEvent->setHandle (UREvent);
3818
3815
3819
3816
auto CheckAndFindAlloca = [](Requirement *Req, const DepDesc &Dep) {
@@ -3885,6 +3882,15 @@ void UpdateCommandBufferCommand::printDot(std::ostream &Stream) const {
3885
3882
void UpdateCommandBufferCommand::emitInstrumentationData () {}
3886
3883
bool UpdateCommandBufferCommand::producesPiEvent () const { return false ; }
3887
3884
3885
+ CGHostTask::CGHostTask (std::shared_ptr<HostTask> HostTask,
3886
+ detail::queue_impl *Queue,
3887
+ std::shared_ptr<detail::context_impl> Context,
3888
+ std::vector<ArgDesc> Args, CG::StorageInitHelper CGData,
3889
+ CGType Type, detail::code_location loc)
3890
+ : CG(Type, std::move(CGData), std::move(loc)),
3891
+ MHostTask(std::move(HostTask)),
3892
+ MQueue(Queue ? Queue->shared_from_this () : nullptr), MContext(Context),
3893
+ MArgs(std::move(Args)) {}
3888
3894
} // namespace detail
3889
3895
} // namespace _V1
3890
3896
} // namespace sycl
0 commit comments