From cfa1a61ccdc1b2b80f403259177e5f96af88bd37 Mon Sep 17 00:00:00 2001 From: Aksel Hjerpbakk Date: Sun, 7 Sep 2025 22:04:18 +0200 Subject: [PATCH] update tracy to 0.12.2 --- libs/tracy/TracyClient.cpp | 4 - libs/tracy/client/TracyCallstack.h | 4 +- libs/tracy/client/TracyLock.hpp | 6 +- libs/tracy/client/TracyProfiler.cpp | 145 ++++--- libs/tracy/client/TracyProfiler.hpp | 1 + libs/tracy/client/TracyRocprof.cpp | 556 --------------------------- libs/tracy/client/TracySysTime.cpp | 14 +- libs/tracy/client/TracySysTrace.hpp | 4 +- libs/tracy/client/tracy_rpmalloc.cpp | 2 +- libs/tracy/common/TracyProtocol.hpp | 3 +- libs/tracy/common/TracyQueue.hpp | 35 +- libs/tracy/common/TracySystem.cpp | 6 +- libs/tracy/common/TracyUwp.hpp | 11 + libs/tracy/common/TracyVersion.hpp | 2 +- libs/tracy/common/TracyWinFamily.hpp | 16 - libs/tracy/tracy/Tracy.hpp | 2 +- libs/tracy/tracy/TracyVulkan.hpp | 10 +- 17 files changed, 102 insertions(+), 719 deletions(-) delete mode 100644 libs/tracy/client/TracyRocprof.cpp create mode 100644 libs/tracy/common/TracyUwp.hpp delete mode 100644 libs/tracy/common/TracyWinFamily.hpp diff --git a/libs/tracy/TracyClient.cpp b/libs/tracy/TracyClient.cpp index e9a0184..6224f48 100644 --- a/libs/tracy/TracyClient.cpp +++ b/libs/tracy/TracyClient.cpp @@ -32,10 +32,6 @@ #include "client/TracyOverride.cpp" #include "client/TracyKCore.cpp" -#ifdef TRACY_ROCPROF -# include "client/TracyRocprof.cpp" -#endif - #if defined(TRACY_HAS_CALLSTACK) # if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 # include "libbacktrace/alloc.cpp" diff --git a/libs/tracy/client/TracyCallstack.h b/libs/tracy/client/TracyCallstack.h index 1aca729..2c7ecad 100644 --- a/libs/tracy/client/TracyCallstack.h +++ b/libs/tracy/client/TracyCallstack.h @@ -8,8 +8,8 @@ # endif # if defined _WIN32 -# include "../common/TracyWinFamily.hpp" -# if !defined TRACY_WIN32_NO_DESKTOP +# include "../common/TracyUwp.hpp" +# ifndef TRACY_UWP # define TRACY_HAS_CALLSTACK 1 # endif # elif defined __ANDROID__ diff --git a/libs/tracy/client/TracyLock.hpp b/libs/tracy/client/TracyLock.hpp index e00b344..d12a3c1 100644 --- a/libs/tracy/client/TracyLock.hpp +++ b/libs/tracy/client/TracyLock.hpp @@ -219,9 +219,8 @@ class Lockable m_ctx.CustomName( name, size ); } - T m_lockable; - private: + T m_lockable; LockableCtx m_ctx; }; @@ -536,9 +535,8 @@ class SharedLockable m_ctx.CustomName( name, size ); } - T m_lockable; - private: + T m_lockable; SharedLockableCtx m_ctx; }; diff --git a/libs/tracy/client/TracyProfiler.cpp b/libs/tracy/client/TracyProfiler.cpp index e1b9d50..2283076 100644 --- a/libs/tracy/client/TracyProfiler.cpp +++ b/libs/tracy/client/TracyProfiler.cpp @@ -9,7 +9,7 @@ # include # include # include -# include "../common/TracyWinFamily.hpp" +# include "../common/TracyUwp.hpp" # ifndef _MSC_VER # include # endif @@ -327,13 +327,7 @@ static inline void CpuId( uint32_t* regs, uint32_t leaf ) static void InitFailure( const char* msg ) { -#if defined TRACY_GDK - const char* format = "Tracy Profiler initialization failure: %s\n"; - const int length = snprintf( nullptr, 0, format, msg ); - char* buffer = (char*)alloca( length + 1 ); - snprintf( buffer, length + 1, format, msg ); - OutputDebugStringA( buffer ); -#elif defined _WIN32 +#if defined _WIN32 bool hasConsole = false; bool reopen = false; const auto attached = AttachConsole( ATTACH_PARENT_PROCESS ); @@ -516,7 +510,7 @@ static const char* GetHostInfo() static char buf[1024]; auto ptr = buf; #if defined _WIN32 -# if defined TRACY_WIN32_NO_DESKTOP +# ifdef TRACY_UWP auto GetVersion = &::GetVersionEx; # else auto GetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlGetVersion" ); @@ -599,7 +593,7 @@ static const char* GetHostInfo() char hostname[512]; gethostname( hostname, 512 ); -# if defined TRACY_WIN32_NO_DESKTOP +# ifdef TRACY_UWP const char* user = ""; # else DWORD userSz = UNLEN+1; @@ -810,7 +804,7 @@ static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, return msg; } -#if defined _WIN32 && !defined TRACY_WIN32_NO_DESKTOP && !defined TRACY_NO_CRASH_HANDLER +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER static DWORD s_profilerThreadId = 0; static DWORD s_symbolThreadId = 0; static char s_crashText[1024]; @@ -1171,38 +1165,6 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) } #endif -#ifdef TRACY_HAS_SYSTEM_TRACING -static void StartSystemTracing( int64_t& samplingPeriod ) -{ - assert( s_sysTraceThread == nullptr ); - - // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) - // as it can have significant impact on the size of the traces - const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); - const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); - if( disableSystrace ) - { - TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); - } - else if( SysTraceStart( samplingPeriod ) ) - { - s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); - new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); - std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); - } -} - -static void StopSystemTracing() -{ - if( s_sysTraceThread ) - { - SysTraceStop(); - s_sysTraceThread->~Thread(); - tracy_free( s_sysTraceThread ); - s_sysTraceThread = nullptr; - } -} -#endif enum { QueuePrealloc = 256 * 1024 }; @@ -1556,7 +1518,7 @@ void Profiler::InstallCrashHandler() sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); #endif -#if defined _WIN32 && !defined TRACY_WIN32_NO_DESKTOP && !defined TRACY_NO_CRASH_HANDLER +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only // want to catch unhandled exceptions. m_prevHandler = reinterpret_cast( SetUnhandledExceptionFilter( CrashFilter ) ); @@ -1570,7 +1532,7 @@ void Profiler::InstallCrashHandler() void Profiler::RemoveCrashHandler() { -#if defined _WIN32 && !defined TRACY_WIN32_NO_DESKTOP && !defined TRACY_NO_CRASH_HANDLER +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER if( m_crashHandlerInstalled ) { auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); @@ -1601,7 +1563,20 @@ void Profiler::RemoveCrashHandler() void Profiler::SpawnWorkerThreads() { #ifdef TRACY_HAS_SYSTEM_TRACING - StartSystemTracing( m_samplingPeriod ); + // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) + // as it can have significant impact on the size of the traces + const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); + const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); + if( disableSystrace ) + { + TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); + } + else if( SysTraceStart( m_samplingPeriod ) ) + { + s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); + std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); + } #endif s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); @@ -1617,7 +1592,7 @@ void Profiler::SpawnWorkerThreads() new(s_symbolThread) Thread( LaunchSymbolWorker, this ); #endif -#if defined _WIN32 && !defined TRACY_WIN32_NO_DESKTOP && !defined TRACY_NO_CRASH_HANDLER +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER s_profilerThreadId = GetThreadId( s_thread->Handle() ); # ifdef TRACY_HAS_CALLSTACK s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); @@ -1638,7 +1613,12 @@ Profiler::~Profiler() RemoveCrashHandler(); #ifdef TRACY_HAS_SYSTEM_TRACING - StopSystemTracing(); + if( s_sysTraceThread ) + { + SysTraceStop(); + s_sysTraceThread->~Thread(); + tracy_free( s_sysTraceThread ); + } #endif #ifdef TRACY_HAS_CALLSTACK @@ -1791,6 +1771,7 @@ void Profiler::Worker() MemWrite( &welcome.timerMul, m_timerMul ); MemWrite( &welcome.initBegin, GetInitTime() ); MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) ); + MemWrite( &welcome.delay, m_delay ); MemWrite( &welcome.resolution, m_resolution ); MemWrite( &welcome.epoch, m_epoch ); MemWrite( &welcome.exectime, m_exectime ); @@ -2030,6 +2011,7 @@ void Profiler::Worker() } else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty ) { + if( ShouldExit() ) break; if( m_bufferOffset != m_bufferStart ) { if( !CommitData() ) break; @@ -2060,7 +2042,7 @@ void Profiler::Worker() connActive = HandleServerQuery(); if( !connActive ) break; } - if( !connActive || ShouldExit() ) break; + if( !connActive ) break; } if( ShouldExit() ) break; @@ -2126,13 +2108,7 @@ void Profiler::Worker() while( s_symbolThreadGone.load() == false ) { YieldThread(); } #endif - // Client is exiting. -#ifdef TRACY_HAS_SYSTEM_TRACING - // Stop filling queues with new data. - StopSystemTracing(); -#endif - - // Send items remaining in queues. + // Client is exiting. Send items remaining in queues. for(;;) { const auto status = Dequeue( token ); @@ -2383,10 +2359,6 @@ static void FreeAssociatedMemory( const QueueItem& item ) tracy_free( (void*)ptr ); break; #endif - case QueueType::GpuAnnotationName: - ptr = MemRead( &item.gpuAnnotationNameFat.ptr ); - tracy_free( (void*)ptr ); - break; #ifdef TRACY_ON_DEMAND case QueueType::MessageAppInfo: case QueueType::GpuContextName: @@ -2602,12 +2574,6 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) tracy_free_fast( (void*)ptr ); #endif break; - case QueueType::GpuAnnotationName: - ptr = MemRead( &item->gpuAnnotationNameFat.ptr ); - size = MemRead( &item->gpuAnnotationNameFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; case QueueType::PlotDataInt: case QueueType::PlotDataFloat: case QueueType::PlotDataDouble: @@ -2966,14 +2932,6 @@ Profiler::DequeueStatus Profiler::DequeueSerial() #endif break; } - case QueueType::GpuAnnotationName: - { - ptr = MemRead( &item->gpuAnnotationNameFat.ptr ); - uint16_t size = MemRead( &item->gpuAnnotationNameFat.size ); - SendSingleString( (const char*)ptr, size ); - tracy_free_fast( (void*)ptr ); - break; - } #ifdef TRACY_FIBERS case QueueType::ZoneBegin: case QueueType::ZoneBeginCallstack: @@ -3853,6 +3811,43 @@ void Profiler::CalibrateDelay() if( dti > 0 && dti < mindiff ) mindiff = dti; } m_resolution = mindiff; + +#ifdef TRACY_DELAYED_INIT + m_delay = m_resolution; +#else + constexpr int Events = Iterations * 2; // start + end + static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); + + static const tracy::SourceLocationData __tracy_source_location { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; + const auto t0 = GetTime(); + for( int i=0; izoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location ); + TracyLfqCommit; + } + { + TracyLfqPrepare( QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.time, GetTime() ); + TracyLfqCommit; + } + } + const auto t1 = GetTime(); + const auto dt = t1 - t0; + m_delay = dt / Events; + + moodycamel::ConsumerToken token( GetQueue() ); + int left = Events; + while( left != 0 ) + { + const auto sz = GetQueue().try_dequeue_bulk_single( token, [](const uint64_t&){}, [](QueueItem* item, size_t sz){} ); + assert( sz > 0 ); + left -= (int)sz; + } + assert( GetQueue().size_approx() == 0 ); +#endif } void Profiler::ReportTopology() @@ -3867,7 +3862,7 @@ void Profiler::ReportTopology() }; #if defined _WIN32 -# if defined TRACY_WIN32_NO_DESKTOP +# ifdef TRACY_UWP t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = &::GetLogicalProcessorInformationEx; # else t_GetLogicalProcessorInformationEx _GetLogicalProcessorInformationEx = (t_GetLogicalProcessorInformationEx)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetLogicalProcessorInformationEx" ); diff --git a/libs/tracy/client/TracyProfiler.hpp b/libs/tracy/client/TracyProfiler.hpp index e773f5e..8d16905 100644 --- a/libs/tracy/client/TracyProfiler.hpp +++ b/libs/tracy/client/TracyProfiler.hpp @@ -991,6 +991,7 @@ class Profiler double m_timerMul; uint64_t m_resolution; + uint64_t m_delay; std::atomic m_timeBegin; uint32_t m_mainThread; uint64_t m_epoch, m_exectime; diff --git a/libs/tracy/client/TracyRocprof.cpp b/libs/tracy/client/TracyRocprof.cpp deleted file mode 100644 index 370e42e..0000000 --- a/libs/tracy/client/TracyRocprof.cpp +++ /dev/null @@ -1,556 +0,0 @@ -#include "../server/tracy_robin_hood.h" -#include "TracyProfiler.hpp" -#include "TracyThread.hpp" -#include "tracy/TracyC.h" -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define ROCPROFILER_CALL( result, msg ) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if( CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS ) \ - { \ - std::string status_msg = rocprofiler_get_status_string( CHECKSTATUS ); \ - std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg << " failed with error code " \ - << CHECKSTATUS << ": " << status_msg << std::endl; \ - std::stringstream errmsg{}; \ - errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" << status_msg \ - << ")"; \ - throw std::runtime_error( errmsg.str() ); \ - } \ - } - -namespace -{ - -using kernel_symbol_data_t = rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t; - -struct DispatchData -{ - int64_t launch_start; - int64_t launch_end; - uint32_t thread_id; - uint16_t query_id; -}; - -struct ToolData -{ - uint32_t version; - const char* runtime_version; - uint32_t priority; - rocprofiler_client_id_t client_id; - uint8_t context_id; - bool init; - uint64_t query_id; - int64_t previous_cpu_time; - tracy::unordered_map client_kernels; - tracy::unordered_map dispatch_data; - tracy::unordered_set counter_names = { "SQ_WAVES", "GL2C_MISS", "GL2C_HIT" }; - std::unique_ptr cal_thread; - std::mutex mut{}; -}; - -using namespace tracy; - -rocprofiler_context_id_t& get_client_ctx() -{ - static rocprofiler_context_id_t ctx{ 0 }; - return ctx; -} - -const char* CTX_NAME = "rocprofv3"; - -uint8_t gpu_context_allocate( ToolData* data ) -{ - - timespec ts; - clock_gettime( CLOCK_BOOTTIME, &ts ); - uint64_t cpu_timestamp = Profiler::GetTime(); - uint64_t gpu_timestamp = ( (uint64_t)ts.tv_sec * 1000000000 ) + ts.tv_nsec; - float timestamp_period = 1.0f; - data->previous_cpu_time = cpu_timestamp; - - // Allocate the process-unique GPU context ID. There's a max of 255 available; - // if we are recreating devices a lot we may exceed that. Don't do that, or - // wrap around and get weird (but probably still usable) numbers. - uint8_t context_id = tracy::GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ); - if( context_id >= 255 ) - { - context_id %= 255; - } - - uint8_t context_flags = 0; -#ifdef TRACY_ROCPROF_CALIBRATION - // Tell tracy we'll be passing calibrated timestamps and not to mess with - // the times. We'll periodically send GpuCalibration events in case the - // times drift. - context_flags |= tracy::GpuContextCalibration; -#endif - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuNewContext ); - tracy::MemWrite( &item->gpuNewContext.cpuTime, cpu_timestamp ); - tracy::MemWrite( &item->gpuNewContext.gpuTime, gpu_timestamp ); - memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); - tracy::MemWrite( &item->gpuNewContext.period, timestamp_period ); - tracy::MemWrite( &item->gpuNewContext.context, context_id ); - tracy::MemWrite( &item->gpuNewContext.flags, context_flags ); - tracy::MemWrite( &item->gpuNewContext.type, tracy::GpuContextType::Rocprof ); - tracy::Profiler::QueueSerialFinish(); - } - - // Send the name of the context along. - // NOTE: Tracy will unconditionally free the name so we must clone it here. - // Since internally Tracy will use its own rpmalloc implementation we must - // make sure we allocate from the same source. - size_t name_length = strlen( CTX_NAME ); - char* cloned_name = (char*)tracy::tracy_malloc( name_length ); - memcpy( cloned_name, CTX_NAME, name_length ); - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuContextName ); - tracy::MemWrite( &item->gpuContextNameFat.context, context_id ); - tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)cloned_name ); - tracy::MemWrite( &item->gpuContextNameFat.size, name_length ); - tracy::Profiler::QueueSerialFinish(); - } - - return context_id; -} - -uint64_t kernel_src_loc( ToolData* data, uint64_t kernel_id ) -{ - uint64_t src_loc = 0; - auto _lk = std::unique_lock{ data->mut }; - rocprofiler_kernel_id_t kid = kernel_id; - if( data->client_kernels.count( kid ) ) - { - auto& sym_data = data->client_kernels[kid]; - const char* name = sym_data.kernel_name; - size_t name_len = strlen( name ); - uint32_t line = 0; - src_loc = tracy::Profiler::AllocSourceLocation( line, NULL, 0, name, name_len, NULL, 0 ); - } - return src_loc; -} - -void record_interval( ToolData* data, rocprofiler_timestamp_t start_timestamp, rocprofiler_timestamp_t end_timestamp, - uint64_t src_loc, rocprofiler_dispatch_id_t dispatch_id ) -{ - - uint16_t query_id = 0; - uint8_t context_id = data->context_id; - - { - auto _lk = std::unique_lock{ data->mut }; - query_id = data->query_id; - data->query_id++; - if( dispatch_id != UINT64_MAX ) - { - DispatchData& dispatch_data = data->dispatch_data[dispatch_id]; - dispatch_data.query_id = query_id; - dispatch_data.thread_id = tracy::GetThreadHandle(); - } - } - - uint64_t cpu_start_time = 0, cpu_end_time = 0; - if( dispatch_id == UINT64_MAX ) - { - cpu_start_time = tracy::Profiler::GetTime(); - cpu_end_time = tracy::Profiler::GetTime(); - } - else - { - auto _lk = std::unique_lock{ data->mut }; - DispatchData& dispatch_data = data->dispatch_data[dispatch_id]; - cpu_start_time = dispatch_data.launch_start; - cpu_end_time = dispatch_data.launch_end; - } - - if( src_loc != 0 ) - { - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginAllocSrcLocSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)src_loc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id ); - tracy::MemWrite( &item->gpuZoneBegin.context, context_id ); - tracy::Profiler::QueueSerialFinish(); - } - } - else - { - static const ___tracy_source_location_data src_loc = { NULL, NULL, NULL, 0, 0 }; - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneBeginSerial ); - tracy::MemWrite( &item->gpuZoneBegin.cpuTime, cpu_start_time ); - tracy::MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)&src_loc ); - tracy::MemWrite( &item->gpuZoneBegin.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneBegin.queryId, query_id ); - tracy::MemWrite( &item->gpuZoneBegin.context, context_id ); - tracy::Profiler::QueueSerialFinish(); - } - } - - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, start_timestamp ); - tracy::MemWrite( &item->gpuTime.queryId, query_id ); - tracy::MemWrite( &item->gpuTime.context, context_id ); - tracy::Profiler::QueueSerialFinish(); - } - - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneEndSerial ); - tracy::MemWrite( &item->gpuZoneEnd.cpuTime, cpu_end_time ); - tracy::MemWrite( &item->gpuZoneEnd.thread, tracy::GetThreadHandle() ); - tracy::MemWrite( &item->gpuZoneEnd.queryId, query_id ); - tracy::MemWrite( &item->gpuZoneEnd.context, context_id ); - tracy::Profiler::QueueSerialFinish(); - } - - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTime ); - tracy::MemWrite( &item->gpuTime.gpuTime, end_timestamp ); - tracy::MemWrite( &item->gpuTime.queryId, query_id ); - tracy::MemWrite( &item->gpuTime.context, context_id ); - tracy::Profiler::QueueSerialFinish(); - } -} - -void record_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data, - rocprofiler_record_counter_t* record_data, size_t record_count, - rocprofiler_user_data_t /*user_data*/, void* callback_data ) -{ - assert( callback_data != nullptr ); - ToolData* data = static_cast( callback_data ); - if( !data->init ) return; - - std::unordered_map sums; - for( size_t i = 0; i < record_count; ++i ) - { - auto _counter_id = rocprofiler_counter_id_t{}; - ROCPROFILER_CALL( rocprofiler_query_record_counter_id( record_data[i].id, &_counter_id ), - "query record counter id" ); - sums[_counter_id.handle] += record_data[i].counter_value; - } - - uint16_t query_id = 0; - uint32_t thread_id = 0; - { - auto _lk = std::unique_lock{ data->mut }; - // An assumption is made here that the counter values are supplied after the dispatch - // complete callback. - assert( data->dispatch_data.count( dispatch_data.dispatch_info.dispatch_id ) ); - DispatchData& ddata = data->dispatch_data[dispatch_data.dispatch_info.dispatch_id]; - query_id = ddata.query_id; - thread_id = ddata.thread_id; - } - - for( auto& p : sums ) - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuZoneAnnotation ); - tracy::MemWrite( &item->zoneAnnotation.noteId, p.first ); - tracy::MemWrite( &item->zoneAnnotation.queryId, query_id ); - tracy::MemWrite( &item->zoneAnnotation.thread, thread_id ); - tracy::MemWrite( &item->zoneAnnotation.value, p.second ); - tracy::MemWrite( &item->zoneAnnotation.context, data->context_id ); - tracy::Profiler::QueueSerialFinish(); - } -} - -/** - * Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue. - * rocprofiler_counter_config_id_t* is a return to specify what counters to collect - * for this dispatch (dispatch_packet). - */ -void dispatch_callback( rocprofiler_dispatch_counting_service_data_t dispatch_data, - rocprofiler_profile_config_id_t* config, rocprofiler_user_data_t* /*user_data*/, - void* callback_data ) -{ - assert( callback_data != nullptr ); - ToolData* data = static_cast( callback_data ); - if( !data->init ) return; - - /** - * This simple example uses the same profile counter set for all agents. - * We store this in a cache to prevent constructing many identical profile counter - * sets. We first check the cache to see if we have already constructed a counter" - * set for the agent. If we have, return it. Otherwise, construct a new profile counter - * set. - */ - static std::shared_mutex m_mutex = {}; - static std::unordered_map profile_cache = {}; - - auto search_cache = [&]() - { - if( auto pos = profile_cache.find( dispatch_data.dispatch_info.agent_id.handle ); pos != profile_cache.end() ) - { - *config = pos->second; - return true; - } - return false; - }; - - { - auto rlock = std::shared_lock{ m_mutex }; - if( search_cache() ) return; - } - - auto wlock = std::unique_lock{ m_mutex }; - if( search_cache() ) return; - - // GPU Counter IDs - std::vector gpu_counters; - - // Iterate through the agents and get the counters available on that agent - ROCPROFILER_CALL( - rocprofiler_iterate_agent_supported_counters( - dispatch_data.dispatch_info.agent_id, - []( rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, void* user_data ) - { - std::vector* vec = - static_cast*>( user_data ); - for( size_t i = 0; i < num_counters; i++ ) - { - vec->push_back( counters[i] ); - } - return ROCPROFILER_STATUS_SUCCESS; - }, - static_cast( &gpu_counters ) ), - "Could not fetch supported counters" ); - - std::vector collect_counters; - collect_counters.reserve( data->counter_names.size() ); - // Look for the counters contained in counters_to_collect in gpu_counters - for( auto& counter : gpu_counters ) - { - rocprofiler_counter_info_v0_t info; - ROCPROFILER_CALL( - rocprofiler_query_counter_info( counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast( &info ) ), - "Could not query info" ); - if( data->counter_names.count( std::string( info.name ) ) > 0 ) - { - collect_counters.push_back( counter ); - - size_t name_length = strlen( info.name ); - char* cloned_name = (char*)tracy::tracy_malloc( name_length ); - memcpy( cloned_name, info.name, name_length ); - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuAnnotationName ); - tracy::MemWrite( &item->gpuAnnotationNameFat.context, data->context_id ); - tracy::MemWrite( &item->gpuAnnotationNameFat.noteId, counter.handle ); - tracy::MemWrite( &item->gpuAnnotationNameFat.ptr, (uint64_t)cloned_name ); - tracy::MemWrite( &item->gpuAnnotationNameFat.size, name_length ); - tracy::Profiler::QueueSerialFinish(); - } - } - } - - // Create a colleciton profile for the counters - rocprofiler_profile_config_id_t profile = { .handle = 0 }; - ROCPROFILER_CALL( rocprofiler_create_profile_config( dispatch_data.dispatch_info.agent_id, collect_counters.data(), - collect_counters.size(), &profile ), - "Could not construct profile cfg" ); - - profile_cache.emplace( dispatch_data.dispatch_info.agent_id.handle, profile ); - // Return the profile to collect those counters for this dispatch - *config = profile; -} - -void tool_callback_tracing_callback( rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, - void* callback_data ) -{ - assert( callback_data != nullptr ); - ToolData* data = static_cast( callback_data ); - if( !data->init ) return; - - if( record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && - record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER ) - { - auto* sym_data = static_cast( record.payload ); - - if( record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD ) - { - auto _lk = std::unique_lock{ data->mut }; - data->client_kernels.emplace( sym_data->kernel_id, *sym_data ); - } - else if( record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD ) - { - auto _lk = std::unique_lock{ data->mut }; - data->client_kernels.erase( sym_data->kernel_id ); - } - } - else if( record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH ) - { - auto* rdata = static_cast( record.payload ); - if( record.operation == ROCPROFILER_KERNEL_DISPATCH_ENQUEUE ) - { - if( record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER ) - { - auto _lk = std::unique_lock{ data->mut }; - data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_start = tracy::Profiler::GetTime(); - } - else if( record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT ) - { - auto _lk = std::unique_lock{ data->mut }; - data->dispatch_data[rdata->dispatch_info.dispatch_id].launch_end = tracy::Profiler::GetTime(); - } - } - else if( record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE ) - { - uint64_t src_loc = kernel_src_loc( data, rdata->dispatch_info.kernel_id ); - record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc, - rdata->dispatch_info.dispatch_id ); - } - } - else if( record.kind == ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY && - record.operation != ROCPROFILER_MEMORY_COPY_NONE && record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT ) - { - auto* rdata = static_cast( record.payload ); - const char* name = nullptr; - switch( record.operation ) - { - case ROCPROFILER_MEMORY_COPY_DEVICE_TO_DEVICE: - name = "DeviceToDeviceCopy"; - break; - case ROCPROFILER_MEMORY_COPY_DEVICE_TO_HOST: - name = "DeviceToHostCopy"; - break; - case ROCPROFILER_MEMORY_COPY_HOST_TO_DEVICE: - name = "HostToDeviceCopy"; - break; - case ROCPROFILER_MEMORY_COPY_HOST_TO_HOST: - name = "HostToHostCopy"; - break; - } - size_t name_len = strlen( name ); - uint64_t src_loc = tracy::Profiler::AllocSourceLocation( 0, NULL, 0, name, name_len, NULL, 0 ); - record_interval( data, rdata->start_timestamp, rdata->end_timestamp, src_loc, UINT64_MAX ); - } -} - -void calibration_thread( void* ptr ) -{ - while( !TracyIsStarted ) - ; - ToolData* data = static_cast( ptr ); - data->context_id = gpu_context_allocate( data ); - const char* user_counters = GetEnvVar( "TRACY_ROCPROF_COUNTERS" ); - if( user_counters ) - { - data->counter_names.clear(); - std::stringstream ss( user_counters ); - std::string counter; - while( std::getline( ss, counter, ',' ) ) data->counter_names.insert( counter ); - } - data->init = true; - -#ifdef TRACY_ROCPROF_CALIBRATION - while( data->init ) - { - sleep( 1 ); - - timespec ts; - // HSA performs a linear interpolation of GPU time to CLOCK_BOOTTIME. However, this is - // subject to network time updates and can drift relative to tracy's clock. - clock_gettime( CLOCK_BOOTTIME, &ts ); - int64_t cpu_timestamp = Profiler::GetTime(); - int64_t gpu_timestamp = ts.tv_nsec + ts.tv_sec * 1e9L; - - if( cpu_timestamp > data->previous_cpu_time ) - { - auto* item = tracy::Profiler::QueueSerial(); - tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuCalibration ); - tracy::MemWrite( &item->gpuCalibration.gpuTime, gpu_timestamp ); - tracy::MemWrite( &item->gpuCalibration.cpuTime, cpu_timestamp ); - tracy::MemWrite( &item->gpuCalibration.cpuDelta, cpu_timestamp - data->previous_cpu_time ); - tracy::MemWrite( &item->gpuCalibration.context, data->context_id ); - tracy::Profiler::QueueSerialFinish(); - data->previous_cpu_time = cpu_timestamp; - } - } -#endif -} - -int tool_init( rocprofiler_client_finalize_t fini_func, void* user_data ) -{ - ToolData* data = static_cast( user_data ); - data->cal_thread = std::make_unique( calibration_thread, data ); - - ROCPROFILER_CALL( rocprofiler_create_context( &get_client_ctx() ), "context creation failed" ); - - ROCPROFILER_CALL( rocprofiler_configure_callback_dispatch_counting_service( get_client_ctx(), dispatch_callback, - user_data, record_callback, user_data ), - "Could not setup counting service" ); - - rocprofiler_tracing_operation_t ops[] = { ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER }; - ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(), - ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, ops, 1, - tool_callback_tracing_callback, user_data ), - "callback tracing service failed to configure" ); - - rocprofiler_tracing_operation_t ops2[] = { ROCPROFILER_KERNEL_DISPATCH_COMPLETE, - ROCPROFILER_KERNEL_DISPATCH_ENQUEUE }; - ROCPROFILER_CALL( - rocprofiler_configure_callback_tracing_service( get_client_ctx(), ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH, - ops2, 2, tool_callback_tracing_callback, user_data ), - "callback tracing service failed to configure" ); - - ROCPROFILER_CALL( rocprofiler_configure_callback_tracing_service( get_client_ctx(), - ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY, nullptr, - 0, tool_callback_tracing_callback, user_data ), - "callback tracing service failed to configure" ); - - ROCPROFILER_CALL( rocprofiler_start_context( get_client_ctx() ), "start context" ); - return 0; -} - -void tool_fini( void* tool_data_v ) -{ - rocprofiler_stop_context( get_client_ctx() ); - - ToolData* data = static_cast( tool_data_v ); - data->init = false; - data->cal_thread.reset(); -} -} - -extern "C" -{ - rocprofiler_tool_configure_result_t* rocprofiler_configure( uint32_t version, const char* runtime_version, - uint32_t priority, rocprofiler_client_id_t* client_id ) - { - // If not the first tool to register, indicate that the tool doesn't want to do anything - if( priority > 0 ) return nullptr; - - // (optional) Provide a name for this tool to rocprofiler - client_id->name = "Tracy"; - - // (optional) create configure data - static ToolData data = ToolData{ version, runtime_version, priority, *client_id, 0, false, 0, 0 }; - - // construct configure result - static auto cfg = rocprofiler_tool_configure_result_t{ sizeof( rocprofiler_tool_configure_result_t ), - &tool_init, &tool_fini, static_cast( &data ) }; - - return &cfg; - } -} diff --git a/libs/tracy/client/TracySysTime.cpp b/libs/tracy/client/TracySysTime.cpp index cf7dd9b..b690a91 100644 --- a/libs/tracy/client/TracySysTime.cpp +++ b/libs/tracy/client/TracySysTime.cpp @@ -4,7 +4,6 @@ # if defined _WIN32 # include -# include "../common/TracyWinFamily.hpp" # elif defined __linux__ # include # include @@ -28,24 +27,13 @@ static inline uint64_t ConvertTime( const FILETIME& t ) void SysTime::ReadTimes() { + FILETIME idleTime; FILETIME kernelTime; FILETIME userTime; -# if defined TRACY_GDK - FILETIME creationTime; - FILETIME exitTime; - - GetProcessTimes( GetCurrentProcess(), &creationTime, &exitTime, &kernelTime, &userTime ); - - idle = 0; -# else - FILETIME idleTime; - GetSystemTimes( &idleTime, &kernelTime, &userTime ); idle = ConvertTime( idleTime ); -# endif - const auto kernel = ConvertTime( kernelTime ); const auto user = ConvertTime( userTime ); used = kernel + user; diff --git a/libs/tracy/client/TracySysTrace.hpp b/libs/tracy/client/TracySysTrace.hpp index 2a28e8b..8c663cd 100644 --- a/libs/tracy/client/TracySysTrace.hpp +++ b/libs/tracy/client/TracySysTrace.hpp @@ -2,8 +2,8 @@ #define __TRACYSYSTRACE_HPP__ #if !defined TRACY_NO_SYSTEM_TRACING && ( defined _WIN32 || defined __linux__ ) -# include "../common/TracyWinFamily.hpp" -# if !defined TRACY_WIN32_NO_DESKTOP +# include "../common/TracyUwp.hpp" +# ifndef TRACY_UWP # define TRACY_HAS_SYSTEM_TRACING # endif #endif diff --git a/libs/tracy/client/tracy_rpmalloc.cpp b/libs/tracy/client/tracy_rpmalloc.cpp index c43b8ca..315a40f 100644 --- a/libs/tracy/client/tracy_rpmalloc.cpp +++ b/libs/tracy/client/tracy_rpmalloc.cpp @@ -2780,7 +2780,7 @@ rpmalloc_initialize_config(const rpmalloc_config_t* config) { _memory_huge_pages = 1; } -#if PLATFORM_WINDOWS && !defined TRACY_GDK +#if PLATFORM_WINDOWS if (_memory_config.enable_huge_pages) { HANDLE token = 0; size_t large_page_minimum = GetLargePageMinimum(); diff --git a/libs/tracy/common/TracyProtocol.hpp b/libs/tracy/common/TracyProtocol.hpp index ff38686..40cf5e6 100644 --- a/libs/tracy/common/TracyProtocol.hpp +++ b/libs/tracy/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 76 }; +enum : uint32_t { ProtocolVersion = 74 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; @@ -95,6 +95,7 @@ struct WelcomeMessage double timerMul; int64_t initBegin; int64_t initEnd; + uint64_t delay; uint64_t resolution; uint64_t epoch; uint64_t exectime; diff --git a/libs/tracy/common/TracyQueue.hpp b/libs/tracy/common/TracyQueue.hpp index 765c83c..daef3ec 100644 --- a/libs/tracy/common/TracyQueue.hpp +++ b/libs/tracy/common/TracyQueue.hpp @@ -61,7 +61,6 @@ enum class QueueType : uint8_t ThreadWakeup, GpuTime, GpuContextName, - GpuAnnotationName, CallstackFrameSize, SymbolInformation, ExternalNameMetadata, @@ -112,7 +111,6 @@ enum class QueueType : uint8_t SecondStringData, MemNamePayload, ThreadGroupHint, - GpuZoneAnnotation, StringData, ThreadName, PlotName, @@ -333,7 +331,7 @@ struct QueuePlotDataInt : public QueuePlotDataBase int64_t val; }; -struct QueuePlotDataFloat : public QueuePlotDataBase +struct QueuePlotDataFloat : public QueuePlotDataBase { float val; }; @@ -408,8 +406,7 @@ enum class GpuContextType : uint8_t Direct3D11, Metal, Custom, - CUDA, - Rocprof + CUDA }; enum GpuContextFlags : uint8_t @@ -449,15 +446,6 @@ struct QueueGpuZoneEnd uint8_t context; }; -struct QueueGpuZoneAnnotation -{ - int64_t noteId; - double value; - uint32_t thread; - uint16_t queryId; - uint8_t context; -}; - struct QueueGpuTime { int64_t gpuTime; @@ -479,7 +467,7 @@ struct QueueGpuTimeSync int64_t cpuTime; uint8_t context; }; - + struct QueueGpuContextName { uint8_t context; @@ -491,18 +479,6 @@ struct QueueGpuContextNameFat : public QueueGpuContextName uint16_t size; }; -struct QueueGpuAnnotationName -{ - int64_t noteId; - uint8_t context; -}; - -struct QueueGpuAnnotationNameFat : public QueueGpuAnnotationName -{ - uint64_t ptr; - uint16_t size; -}; - struct QueueMemNamePayload { uint64_t name; @@ -780,8 +756,6 @@ struct QueueItem QueueGpuTimeSync gpuTimeSync; QueueGpuContextName gpuContextName; QueueGpuContextNameFat gpuContextNameFat; - QueueGpuAnnotationName gpuAnnotationName; - QueueGpuAnnotationNameFat gpuAnnotationNameFat; QueueMemAlloc memAlloc; QueueMemFree memFree; QueueMemDiscard memDiscard; @@ -815,7 +789,6 @@ struct QueueItem QueueSourceCodeNotAvailable sourceCodeNotAvailable; QueueFiberEnter fiberEnter; QueueFiberLeave fiberLeave; - QueueGpuZoneAnnotation zoneAnnotation; }; }; #pragma pack( pop ) @@ -876,7 +849,6 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueThreadWakeup ), sizeof( QueueHeader ) + sizeof( QueueGpuTime ), sizeof( QueueHeader ) + sizeof( QueueGpuContextName ), - sizeof( QueueHeader ) + sizeof( QueueGpuAnnotationName ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), sizeof( QueueHeader ) + sizeof( QueueSymbolInformation ), sizeof( QueueHeader ), // ExternalNameMetadata - not for wire transfer @@ -928,7 +900,6 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // second string data sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ), - sizeof( QueueHeader ) + sizeof( QueueGpuZoneAnnotation ), // GPU zone annotation // keep all QueueStringTransfer below sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name diff --git a/libs/tracy/common/TracySystem.cpp b/libs/tracy/common/TracySystem.cpp index 7696ca3..a92a345 100644 --- a/libs/tracy/common/TracySystem.cpp +++ b/libs/tracy/common/TracySystem.cpp @@ -10,7 +10,7 @@ # endif # include # include -# include "TracyWinFamily.hpp" +# include "TracyUwp.hpp" #else # include # include @@ -137,7 +137,7 @@ TRACY_API void SetThreadName( const char* name ) TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ) { #if defined _WIN32 -# if defined TRACY_WIN32_NO_DESKTOP +# ifdef TRACY_UWP static auto _SetThreadDescription = &::SetThreadDescription; # else static auto _SetThreadDescription = (t_SetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "SetThreadDescription" ); @@ -246,7 +246,7 @@ TRACY_API const char* GetThreadName( uint32_t id ) #endif #if defined _WIN32 -# if defined TRACY_WIN32_NO_DESKTOP +# ifdef TRACY_UWP static auto _GetThreadDescription = &::GetThreadDescription; # else static auto _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); diff --git a/libs/tracy/common/TracyUwp.hpp b/libs/tracy/common/TracyUwp.hpp new file mode 100644 index 0000000..7dce96b --- /dev/null +++ b/libs/tracy/common/TracyUwp.hpp @@ -0,0 +1,11 @@ +#ifndef __TRACYUWP_HPP__ +#define __TRACYUWP_HPP__ + +#ifdef _WIN32 +# include +# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +# define TRACY_UWP +# endif +#endif + +#endif diff --git a/libs/tracy/common/TracyVersion.hpp b/libs/tracy/common/TracyVersion.hpp index 7d704c5..93b6737 100644 --- a/libs/tracy/common/TracyVersion.hpp +++ b/libs/tracy/common/TracyVersion.hpp @@ -7,7 +7,7 @@ namespace Version { enum { Major = 0 }; enum { Minor = 12 }; -enum { Patch = 4 }; +enum { Patch = 2 }; } } diff --git a/libs/tracy/common/TracyWinFamily.hpp b/libs/tracy/common/TracyWinFamily.hpp deleted file mode 100644 index b601455..0000000 --- a/libs/tracy/common/TracyWinFamily.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __TRACYWINFAMILY_HPP__ -#define __TRACYWINFAMILY_HPP__ - -#ifdef _WIN32 -# include -# if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -# define TRACY_WIN32_NO_DESKTOP -# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_GAMES) -# define TRACY_GDK -# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) -# define TRACY_UWP -# endif -# endif -#endif - -#endif diff --git a/libs/tracy/tracy/Tracy.hpp b/libs/tracy/tracy/Tracy.hpp index 98957f6..605d149 100644 --- a/libs/tracy/tracy/Tracy.hpp +++ b/libs/tracy/tracy/Tracy.hpp @@ -182,7 +182,7 @@ #define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } #define LockableBase( type ) tracy::Lockable #define SharedLockableBase( type ) tracy::SharedLockable -#define LockMark( varname ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_lock_location_,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &TracyConcat(__tracy_lock_location_,TracyLine) ) +#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##__LINE__ { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##__LINE__ ) #define LockableName( varname, txt, size ) varname.CustomName( txt, size ) #define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) diff --git a/libs/tracy/tracy/TracyVulkan.hpp b/libs/tracy/tracy/TracyVulkan.hpp index 429f299..7264318 100644 --- a/libs/tracy/tracy/TracyVulkan.hpp +++ b/libs/tracy/tracy/TracyVulkan.hpp @@ -16,7 +16,6 @@ #define TracyVkZoneC(c,x,y,z) #define TracyVkZoneTransient(c,x,y,z,w) #define TracyVkCollect(c,x) -#define TracyVkCollectHost(c) #define TracyVkNamedZoneS(c,x,y,z,w,a) #define TracyVkNamedZoneCS(c,x,y,z,w,v,a) @@ -257,9 +256,7 @@ class VkCtx #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) { - cmdbuf ? - VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ) : - VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, 0, m_queryCount ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); m_tail = head; m_oldCnt = 0; int64_t tgpu; @@ -328,9 +325,7 @@ class VkCtx } } - cmdbuf ? - VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ) : - VK_FUNCTION_WRAPPER( vkResetQueryPool( m_device, m_query, wrappedTail, cnt ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ); m_tail += cnt; } @@ -726,7 +721,6 @@ using TracyVkCtx = tracy::VkCtx*; # define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active ); #endif #define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); -#define TracyVkCollectHost( ctx ) ctx->Collect( VK_NULL_HANDLE ); #ifdef TRACY_HAS_CALLSTACK # define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active );