diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 20bf85f1b61ca..05fea6cb05823 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -3,6 +3,10 @@ #2. Use AddLLVM to modify the build and access config options #cmake_policy(SET CMP0057 NEW) #include(AddLLVM) + +set(CMAKE_BUILD_TYPE Debug) + + include(CheckLinkerFlag) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/version.rc.in diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 4e5899df6bfb1..4064cae63e24e 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -25,6 +25,10 @@ #include #endif +#ifdef _WIN32 +#include +#endif + namespace sycl { inline namespace _V1 { namespace detail { @@ -43,7 +47,12 @@ void event_impl::initContextIfNeeded() { } event_impl::~event_impl() { + if (MHasBeenReleased) + std::cout << "~event_impl MHasBeenReleased == true" << std::endl; try { + // __debugbreak(); // CP + MHasBeenReleased = true; + // std::cout << "~event_impl: " << (unsigned long)this << std::endl; // changes the timing auto Handle = this->getHandle(); if (Handle) getAdapter()->call(Handle); @@ -52,7 +61,14 @@ event_impl::~event_impl() { } } + void event_impl::waitInternal(bool *Success) { + // CP -- this does not trip + if(MHasBeenReleased){ + std::cout << "waitInternal HasBeenReleased : " << (unsigned long)this << std::endl; + __debugbreak(); + } + auto Handle = this->getHandle(); if (!MIsHostEvent && Handle) { // Wait for the native event @@ -246,6 +262,13 @@ void event_impl::instrumentationEpilog(void *TelemetryEvent, void event_impl::wait(std::shared_ptr Self, bool *Success) { + // CP -- this trips + if(MHasBeenReleased) { + std::cout << "wait HasBeenRelease: " << (unsigned long)this << std::endl; + __debugbreak(); + } + return; + if (MState == HES_Discarded) throw sycl::exception(make_error_code(errc::invalid), "wait method cannot be used for a discarded event."); diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 0297ebfd3e4d3..c4dfb14632dba 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -67,6 +67,12 @@ class event_impl { /// \param SyclContext is an instance of SYCL context. event_impl(ur_event_handle_t Event, const context &SyclContext); event_impl(const QueueImplPtr &Queue); + + // delete the copy constructors and assignment operators + event_impl(const event_impl&) = delete; + event_impl &operator=(const event_impl&) = delete; + event_impl(event_impl&&) = delete; + event_impl &operator=(event_impl&&) = delete; /// Sets a queue associated with the event /// @@ -347,7 +353,7 @@ class event_impl { // queue and command, as well as the fact that it is not in enqueued state. return MEvent && MQueue.expired() && !MIsEnqueued && !MCommand; } - +bool MHasBeenReleased = false; protected: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -433,6 +439,7 @@ class event_impl { // MEvent is lazily created in first ur handle query. bool MIsDefaultConstructed = false; bool MIsHostEvent = false; + }; } // namespace detail diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index a650fb59fcc26..b868807a6051c 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -887,19 +887,19 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, size_t Len, void *DstMem, std::vector DepEvents, ur_event_handle_t *OutEvent) { - assert(SrcQueue && "USM copy must be called with a valid device queue"); - if (!Len) { // no-op, but ensure DepEvents will still be waited on - if (!DepEvents.empty()) { - SrcQueue->getAdapter()->call( - SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(), - OutEvent); - } - return; - } - - if (!SrcMem || !DstMem) - throw exception(make_error_code(errc::invalid), - "NULL pointer argument in memory copy operation."); + // assert(SrcQueue && "USM copy must be called with a valid device queue"); + // if (!Len) { // no-op, but ensure DepEvents will still be waited on + // if (!DepEvents.empty()) { + // SrcQueue->getAdapter()->call( + // SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(), + // OutEvent); + // } + // return; + // } + // + // if (!SrcMem || !DstMem) + // throw exception(make_error_code(errc::invalid), + // "NULL pointer argument in memory copy operation."); const AdapterPtr &Adapter = SrcQueue->getAdapter(); Adapter->call(SrcQueue->getHandleRef(), diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 1a1877e391117..dabae4632086f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -456,61 +456,74 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, MemOpArgTs... MemOpArgs) { // We need to submit command and update the last event under same lock if we // have in-order queue. - { - std::unique_lock Lock(MMutex, std::defer_lock); + // { + // std::unique_lock Lock(MMutex, std::defer_lock); - std::vector MutableDepEvents; - const std::vector &ExpandedDepEvents = - getExtendDependencyList(DepEvents, MutableDepEvents, Lock); + // std::vector MutableDepEvents; + // const std::vector &ExpandedDepEvents = getExtendDependencyList(DepEvents, MutableDepEvents, Lock); - MEmpty = false; + // MEmpty = false; // If we have a command graph set we need to capture the op through the // handler rather than by-passing the scheduler. - if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( - ExpandedDepEvents, MContext)) { - auto isNoEventsMode = trySwitchingToNoEventsMode(); - if (!CallerNeedsEvent && isNoEventsMode) { - NestedCallsTracker tracker; - MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), - /*PiEvent*/ nullptr); - - return createDiscardedEvent(); - } - - event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); + // if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( + // ExpandedDepEvents, MContext)) { + // auto isNoEventsMode = trySwitchingToNoEventsMode(); + // if (!CallerNeedsEvent && isNoEventsMode) { + // NestedCallsTracker tracker; + // MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), + // /*PiEvent*/ nullptr); + // + // return createDiscardedEvent(); + // } + + // event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); + // const auto &EventImpl = detail::getSyclObjImpl(ResEvent); + //if (EventImpl->MHasBeenReleased) { + // std::cout << "submitMemOpHelper before memop MHasBeenReleased == true" << std::endl; + // __debugbreak(); + //} + + // { + // NestedCallsTracker tracker; + ur_event_handle_t UREvent = nullptr; + MemOpFunc(MemOpArgs..., std::vector(), &UREvent); // No failure if this call is not made + // getAdapter()->call_nocheck(1, &UREvent); <- No failure with this line + // MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &UREvent); + + // EventImpl->setHandle(UREvent); + // EventImpl->setEnqueued(); + // // connect returned event with dependent events + // if (!isInOrder()) { + // std::vector &ExpandedDepEventImplPtrs = + // EventImpl->getPreparedDepsEvents(); + // ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); + // for (const event &DepEvent : ExpandedDepEvents) + // ExpandedDepEventImplPtrs.push_back( + // detail::getSyclObjImpl(DepEvent)); + // + // // EventImpl is local for current thread, no need to lock. + // EventImpl->cleanDepEventsThroughOneLevelUnlocked(); + // } + // } + + // if (isInOrder() && + // (!isNoEventsMode || MContext->getBackend() == backend::opencl)) { + // auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + // : MExtGraphDeps.LastEventPtr; + // EventToStoreIn = EventImpl; + // } + + event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); const auto &EventImpl = detail::getSyclObjImpl(ResEvent); - { - NestedCallsTracker tracker; - ur_event_handle_t UREvent = nullptr; - MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents), &UREvent); - EventImpl->setHandle(UREvent); - EventImpl->setEnqueued(); - // connect returned event with dependent events - if (!isInOrder()) { - std::vector &ExpandedDepEventImplPtrs = - EventImpl->getPreparedDepsEvents(); - ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); - for (const event &DepEvent : ExpandedDepEvents) - ExpandedDepEventImplPtrs.push_back( - detail::getSyclObjImpl(DepEvent)); - - // EventImpl is local for current thread, no need to lock. - EventImpl->cleanDepEventsThroughOneLevelUnlocked(); - } - } - - if (isInOrder() && - (!isNoEventsMode || MContext->getBackend() == backend::opencl)) { - auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr - : MExtGraphDeps.LastEventPtr; - EventToStoreIn = EventImpl; - } - + if (EventImpl->MHasBeenReleased) { // Surprisingly, even this is tripped sometimes + std::cout << "submitMemOpHelper after memop MHasBeenReleased == true" << std::endl; + __debugbreak(); + } return ResEvent; - } - } - return submitWithHandler(Self, DepEvents, CallerNeedsEvent, HandlerFunc); + // } + // } + // return submitWithHandler(Self, DepEvents, CallerNeedsEvent, HandlerFunc); } void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, diff --git a/unified-runtime/source/CMakeLists.txt b/unified-runtime/source/CMakeLists.txt index f0dd315313e73..228b35eeef98f 100644 --- a/unified-runtime/source/CMakeLists.txt +++ b/unified-runtime/source/CMakeLists.txt @@ -3,6 +3,8 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +set(CMAKE_BUILD_TYPE Debug) + add_definitions(-DUR_VERSION="${PROJECT_VERSION_MAJOR}") add_definitions(-DUR_VALIDATION_LAYER_SUPPORTED_VERSION="${PROJECT_VERSION_MAJOR}") diff --git a/unified-runtime/source/adapters/level_zero/common.cpp b/unified-runtime/source/adapters/level_zero/common.cpp index bac059c46f170..c3e4af2064b6a 100644 --- a/unified-runtime/source/adapters/level_zero/common.cpp +++ b/unified-runtime/source/adapters/level_zero/common.cpp @@ -353,6 +353,7 @@ getZeStructureType() { thread_local int32_t ErrorMessageCode = 0; thread_local char ErrorMessage[MaxMessageSize]{}; thread_local int32_t ErrorAdapterNativeCode; +bool ReleaseTracker = true; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *pMessage, int32_t ErrorCode, diff --git a/unified-runtime/source/adapters/level_zero/common.hpp b/unified-runtime/source/adapters/level_zero/common.hpp index 33a1072e217a9..daaffe6aee540 100644 --- a/unified-runtime/source/adapters/level_zero/common.hpp +++ b/unified-runtime/source/adapters/level_zero/common.hpp @@ -381,3 +381,5 @@ extern thread_local int32_t ErrorAdapterNativeCode; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *pMessage, int32_t ErrorCode, int32_t AdapterErrorCode); + +extern bool ReleaseTracker; \ No newline at end of file diff --git a/unified-runtime/source/adapters/level_zero/event.cpp b/unified-runtime/source/adapters/level_zero/event.cpp index f06cae5ec0cb3..2b112adab0443 100644 --- a/unified-runtime/source/adapters/level_zero/event.cpp +++ b/unified-runtime/source/adapters/level_zero/event.cpp @@ -793,7 +793,7 @@ urEventWait(uint32_t NumEvents, // ur_event_handle_t_ *Event = ur_cast(e); if (!Event->hasExternalRefs()) - die("urEventWait must not be called for an internal event"); + die("FIRST urEventWait must not be called for an internal event"); ze_event_handle_t ZeHostVisibleEvent; if (auto Res = Event->getOrCreateHostVisibleEvent(ZeHostVisibleEvent)) @@ -819,7 +819,7 @@ urEventWait(uint32_t NumEvents, { std::shared_lock EventLock(Event->Mutex); if (!Event->hasExternalRefs()) - die("urEventWait must not be called for an internal event"); + die("SECOND urEventWait must not be called for an internal event"); if (!Event->Completed) { auto HostVisibleEvent = Event->HostVisibleEvent; @@ -882,6 +882,11 @@ urEventRetain(/** [in] handle of the event object */ ur_event_handle_t Event) { ur_result_t urEventRelease(/** [in] handle of the event object */ ur_event_handle_t Event) { + if (ReleaseTracker) { + std::cout << "Double release call" << std::endl; + __debugbreak(); + } + ReleaseTracker = true; Event->RefCountExternal--; bool isEventsWaitCompleted = (Event->CommandType == UR_COMMAND_EVENTS_WAIT || diff --git a/unified-runtime/source/adapters/level_zero/memory.cpp b/unified-runtime/source/adapters/level_zero/memory.cpp index edb055f08b202..f3fe7fd094cf2 100644 --- a/unified-runtime/source/adapters/level_zero/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/memory.cpp @@ -1257,6 +1257,7 @@ ur_result_t urEnqueueUSMMemcpy( /// [in,out][optional] return an event object that identifies this /// particular command instance. ur_event_handle_t *OutEvent) { + ReleaseTracker = false; std::scoped_lock lock(Queue->Mutex); return enqueueMemCopyHelper( // TODO: do we need a new command type for this? diff --git a/unified-runtime/source/adapters/level_zero/usm.cpp b/unified-runtime/source/adapters/level_zero/usm.cpp index c6abed7ccabb6..7fa353d6ec214 100644 --- a/unified-runtime/source/adapters/level_zero/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/usm.cpp @@ -730,10 +730,9 @@ enum umf_result_t L0MemoryProvider::free(void *Ptr, size_t Size) { return UMF_RESULT_SUCCESS; } -umf_result_t L0MemoryProvider::GetL0MinPageSize(const void *Mem, - size_t *PageSize) { +umf_result_t L0MemoryProvider::GetL0MinPageSize(void *Mem, size_t *PageSize) { ur_result_t Res = UR_RESULT_SUCCESS; - void *Ptr = const_cast(Mem); + void *Ptr = Mem; if (!Mem) { Res = allocateImpl(&Ptr, 1, 0); @@ -766,8 +765,7 @@ umf_result_t L0MemoryProvider::GetL0MinPageSize(const void *Mem, return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } -umf_result_t L0MemoryProvider::get_min_page_size(const void *Ptr, - size_t *PageSize) { +umf_result_t L0MemoryProvider::get_min_page_size(void *Ptr, size_t *PageSize) { // Query L0 for min page size. Use provided 'Ptr'. if (Ptr) {