From a6330e312783516cd5518fc5dd18f755d77548de Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Tue, 30 Sep 2025 13:28:23 +0000 Subject: [PATCH 1/2] [SYCL] Graph recording support for handler-less kernel submission path --- sycl/source/detail/queue_impl.cpp | 60 +++++++++++++++++++ sycl/source/detail/queue_impl.hpp | 6 ++ sycl/source/handler.cpp | 50 +--------------- .../Extensions/CommandGraph/CommandGraph.cpp | 3 - 4 files changed, 68 insertions(+), 51 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 79769d8819000..21c7a8e6a67d1 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -420,6 +420,61 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, return EventImpl; } +EventImplPtr queue_impl::submit_command_to_graph( + ext::oneapi::experimental::detail::graph_impl &GraphImpl, + std::unique_ptr &CommandGroup, sycl::detail::CGType CGType, + sycl::ext::oneapi::experimental::node_type UserFacingNodeType) { + auto EventImpl = detail::event_impl::create_completed_host_event(); + EventImpl->setSubmittedQueue(weak_from_this()); + ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr; + + // GraphImpl is read and written in this scope so we lock this graph + // with full priviledges. + ext::oneapi::experimental::detail::graph_impl::WriteLock Lock( + GraphImpl.MMutex); + + ext::oneapi::experimental::node_type NodeType = + UserFacingNodeType != ext::oneapi::experimental::node_type::empty + ? UserFacingNodeType + : ext::oneapi::experimental::detail::getNodeTypeFromCG(CGType); + + // Create a new node in the graph representing this command-group + if (isInOrder()) { + // In-order queues create implicit linear dependencies between nodes. + // Find the last node added to the graph from this queue, so our new + // node can set it as a predecessor. + std::vector Deps; + if (ext::oneapi::experimental::detail::node_impl *DependentNode = + GraphImpl.getLastInorderNode(this)) { + Deps.push_back(DependentNode); + } + NodeImpl = &GraphImpl.add(NodeType, std::move(CommandGroup), Deps); + + // If we are recording an in-order queue remember the new node, so it + // can be used as a dependency for any more nodes recorded from this + // queue. + GraphImpl.setLastInorderNode(*this, *NodeImpl); + } else { + ext::oneapi::experimental::detail::node_impl *LastBarrierRecordedFromQueue = + GraphImpl.getBarrierDep(weak_from_this()); + std::vector Deps; + + if (LastBarrierRecordedFromQueue) { + Deps.push_back(LastBarrierRecordedFromQueue); + } + NodeImpl = &GraphImpl.add(NodeType, std::move(CommandGroup), Deps); + + if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) { + GraphImpl.setBarrierDep(weak_from_this(), *NodeImpl); + } + } + + // Associate an event with this new node and return the event. + GraphImpl.addEventForNode(EventImpl, *NodeImpl); + + return EventImpl; +} + detail::EventImplPtr queue_impl::submit_kernel_direct_impl( const NDRDescT &NDRDesc, std::shared_ptr &HostKernel, @@ -454,6 +509,11 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl( CodeLoc)); CommandGroup->MIsTopCodeLoc = IsTopCodeLoc; + if (auto GraphImpl = getCommandGraph(); GraphImpl) { + return submit_command_to_graph(*GraphImpl, CommandGroup, + detail::CGType::Kernel); + } + return detail::Scheduler::getInstance().addCG(std::move(CommandGroup), *this, true); }; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index c3d6748695423..1627cdb5ae261 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -626,6 +626,12 @@ class queue_impl : public std::enable_shared_from_this { bool hasCommandGraph() const { return !MGraph.expired(); } + EventImplPtr submit_command_to_graph( + ext::oneapi::experimental::detail::graph_impl &GraphImpl, + std::unique_ptr &CommandGroup, sycl::detail::CGType CGType, + sycl::ext::oneapi::experimental::node_type UserFacingNodeType = + ext::oneapi::experimental::node_type::empty); + unsigned long long getQueueID() { return MQueueID; } void *getTraceEvent() { return MTraceEvent; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ffa7d80eda4d0..280ad3927b680 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -955,54 +955,8 @@ event handler::finalize() { // If the queue has an associated graph then we need to take the CG and pass // it to the graph to create a node, rather than submit it to the scheduler. if (auto GraphImpl = Queue->getCommandGraph(); GraphImpl) { - auto EventImpl = detail::event_impl::create_completed_host_event(); - EventImpl->setSubmittedQueue(Queue->weak_from_this()); - ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr; - - // GraphImpl is read and written in this scope so we lock this graph - // with full priviledges. - ext::oneapi::experimental::detail::graph_impl::WriteLock Lock( - GraphImpl->MMutex); - - ext::oneapi::experimental::node_type NodeType = - impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty - ? impl->MUserFacingNodeType - : ext::oneapi::experimental::detail::getNodeTypeFromCG(getType()); - - // Create a new node in the graph representing this command-group - if (Queue->isInOrder()) { - // In-order queues create implicit linear dependencies between nodes. - // Find the last node added to the graph from this queue, so our new - // node can set it as a predecessor. - std::vector Deps; - if (ext::oneapi::experimental::detail::node_impl *DependentNode = - GraphImpl->getLastInorderNode(Queue)) { - Deps.push_back(DependentNode); - } - NodeImpl = &GraphImpl->add(NodeType, std::move(CommandGroup), Deps); - - // If we are recording an in-order queue remember the new node, so it - // can be used as a dependency for any more nodes recorded from this - // queue. - GraphImpl->setLastInorderNode(*Queue, *NodeImpl); - } else { - ext::oneapi::experimental::detail::node_impl - *LastBarrierRecordedFromQueue = - GraphImpl->getBarrierDep(Queue->weak_from_this()); - std::vector Deps; - - if (LastBarrierRecordedFromQueue) { - Deps.push_back(LastBarrierRecordedFromQueue); - } - NodeImpl = &GraphImpl->add(NodeType, std::move(CommandGroup), Deps); - - if (NodeImpl->MCGType == sycl::detail::CGType::Barrier) { - GraphImpl->setBarrierDep(Queue->weak_from_this(), *NodeImpl); - } - } - - // Associate an event with this new node and return the event. - GraphImpl->addEventForNode(EventImpl, *NodeImpl); + auto EventImpl = Queue->submit_command_to_graph( + *GraphImpl, CommandGroup, type, impl->MUserFacingNodeType); #ifdef __INTEL_PREVIEW_BREAKING_CHANGES return EventImpl; diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index f8cb84900d4f8..dedd4ebbcb407 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -626,8 +626,6 @@ TEST_F(CommandGraphTest, AccessorModeEdges) { // Tests the transitive queue recording behaviour with queue shortcuts. TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { -// Graphs not supported yet for the no-handler submit path -#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT device Dev; context Ctx{{Dev}}; queue Q1{Ctx, Dev}; @@ -671,7 +669,6 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { ext::oneapi::experimental::queue_state::executing); ASSERT_EQ(Q3.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::executing); -#endif } // Tests that dynamic_work_group_memory.get() will throw on the host side. From 74b007d778702f39cc9a8addf0ca9804937452a2 Mon Sep 17 00:00:00 2001 From: "Ptak, Slawomir" Date: Wed, 1 Oct 2025 08:12:01 +0000 Subject: [PATCH 2/2] Address review comment --- sycl/source/detail/queue_impl.cpp | 4 ++-- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/handler.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 21c7a8e6a67d1..46b237ee5af83 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -422,7 +422,7 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, EventImplPtr queue_impl::submit_command_to_graph( ext::oneapi::experimental::detail::graph_impl &GraphImpl, - std::unique_ptr &CommandGroup, sycl::detail::CGType CGType, + std::unique_ptr CommandGroup, sycl::detail::CGType CGType, sycl::ext::oneapi::experimental::node_type UserFacingNodeType) { auto EventImpl = detail::event_impl::create_completed_host_event(); EventImpl->setSubmittedQueue(weak_from_this()); @@ -510,7 +510,7 @@ detail::EventImplPtr queue_impl::submit_kernel_direct_impl( CommandGroup->MIsTopCodeLoc = IsTopCodeLoc; if (auto GraphImpl = getCommandGraph(); GraphImpl) { - return submit_command_to_graph(*GraphImpl, CommandGroup, + return submit_command_to_graph(*GraphImpl, std::move(CommandGroup), detail::CGType::Kernel); } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 1627cdb5ae261..e9bc77e38df92 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -628,7 +628,7 @@ class queue_impl : public std::enable_shared_from_this { EventImplPtr submit_command_to_graph( ext::oneapi::experimental::detail::graph_impl &GraphImpl, - std::unique_ptr &CommandGroup, sycl::detail::CGType CGType, + std::unique_ptr CommandGroup, sycl::detail::CGType CGType, sycl::ext::oneapi::experimental::node_type UserFacingNodeType = ext::oneapi::experimental::node_type::empty); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 280ad3927b680..f1502406a0431 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -956,7 +956,7 @@ event handler::finalize() { // it to the graph to create a node, rather than submit it to the scheduler. if (auto GraphImpl = Queue->getCommandGraph(); GraphImpl) { auto EventImpl = Queue->submit_command_to_graph( - *GraphImpl, CommandGroup, type, impl->MUserFacingNodeType); + *GraphImpl, std::move(CommandGroup), type, impl->MUserFacingNodeType); #ifdef __INTEL_PREVIEW_BREAKING_CHANGES return EventImpl;