diff --git a/examples/nvperf.jl b/examples/nvperf.jl new file mode 100644 index 0000000000..7421b9ead2 --- /dev/null +++ b/examples/nvperf.jl @@ -0,0 +1,21 @@ +using CUDA + +NVPERF.initialize() +CUPTI.initialize_profiler() + +avail = CUPTI.counter_availability() +chip = first(NVPERF.supported_chips()) + +me = NVPERF.CUDAMetricsEvaluator(chip, avail) + +NVPERF.list_metrics(me) + +m = NVPERF.Metric(me, "dram__bytes.sum.per_second") +description, unit = NVPERF.properties(m) +@show description +@show string(unit) + +@show NVPERF.MetricEvalRequest(me, "dram__bytes.sum.per_second") + +# Need counterDataImage +# then range then \ No newline at end of file diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index 6e81f01503..ab9a03a9df 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -3,3 +3,94 @@ function version() cuptiGetVersion(version_ref) VersionNumber(version_ref[]) end + + # params = Ref(CUpti_Profiler_CounterDataImage_CalculateSize_Params( + # CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE, + # C_NULL, 0, C_NULL, 0)) + # cuptiProfilerCounterDataImageCalculateSize(params) + + +function initialize_profiler() + params = Ref(CUpti_Profiler_Initialize_Params( + CUpti_Profiler_Initialize_Params_STRUCT_SIZE, + C_NULL)) + cuptiProfilerInitialize(params) +end + +function deinitalize_profiler() + params = Ref(CUpti_Profiler_DeInitialize_Params( + CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE, + C_NULL)) + cuptiProfilerDeInitialize(params) +end + +function counter_availability(ctx = context()) + # 1. Query size + params = Ref(CUpti_Profiler_GetCounterAvailability_Params( + CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, + C_NULL, ctx.handle, 0, C_NULL)) + cuptiProfilerGetCounterAvailability(params) + + sz = params[].counterAvailabilityImageSize + buffer = Vector{UInt8}(undef, sz) + + GC.@preserve buffer begin + params = Ref(CUpti_Profiler_GetCounterAvailability_Params( + CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, + C_NULL, ctx.handle, sz, pointer(buffer))) + cuptiProfilerGetCounterAvailability(params) + end + return buffer +end + +abstract type CounterDataBuilder end +function prefix end + +function CounterDataImage(builder, maxNumRanges, maxNumRangeTreeNodes, maxRangeNameLength) + p = prefix(builder) + GC.@preserve p begin + options = Ref(CUpti_Profiler_CounterDataImageOptions( + CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE, + C_NULL, pointer(p), length(p), maxNumRanges, maxNumRangeTreeNodes, maxRangeNameLength)) + + GC.@preserve options begin + params = Ref(CUpti_Profiler_CounterDataImage_CalculateSize_Params( + CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE, + C_NULL, CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE, pointer(options), 0)) + CUpti_Profiler_CounterDataImage_CalculateSize(params) + sz = params[].CounterDataImageSize + end + dataImage = Vector{UInt8}(undef, sz) + GC.@preserve options dataImage begin + params = Ref(CUpti_Profiler_CounterDataImage_Initialize_Params( + CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE, + C_NULL, CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE, pointer(options), + sz, pointer(dataImage))) + CUpti_Profiler_CounterDataImage_Initialize(params) + end + end + + GC.@preserve dataImage begin + params = Ref(CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params( + CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE, + C_NULL, sz, pointer(dataImage),0)) + CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize(params) + scratch_sz = params[].counterDataScratchBufferSize + end + scratch = Vector{UInt8}(undef, scratch_sz) + GC.@preserve dataImage scratch begin + params = Ref(CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params( + CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE, + C_NULL, sz, pointer(dataImage),scratch_sz, pointer(scratch))) + CUpti_Profiler_CounterDataImage_InitializeScratchBuffer(params) + end + + return (; dataImage, scratch) +end + + +mutable struct CounterDataImage + + +end + diff --git a/lib/nvperf/wrappers.jl b/lib/nvperf/wrappers.jl index 91cfe1ac27..ebbf2ae442 100644 --- a/lib/nvperf/wrappers.jl +++ b/lib/nvperf/wrappers.jl @@ -15,3 +15,192 @@ function supported_chips() end return names end + +function scratch_buffer(chipName, counter_availability) + GC.@preserve chipName counter_availability begin + params = Ref(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params( + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE, + C_NULL, pointer(chipName), pointer(counter_availability), 0 + )) + NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(params) + sz = params[].scratchBufferSize + end + return Vector{UInt8}(undef, sz) +end + +abstract type MetricsEvaluator end + +mutable struct CUDAMetricsEvaluator <: MetricsEvaluator + handle::Ptr{NVPW_MetricsEvaluator} + scratch::Vector{UInt8} + availability::Vector{UInt8} + chip::String + + function CUDAMetricsEvaluator(chip, availability) + scratch = scratch_buffer(chip, availability) + + GC.@preserve chip availability scratch begin + params = Ref(NVPW_CUDA_MetricsEvaluator_Initialize_Params( + NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE, + C_NULL, pointer(scratch), length(scratch), pointer(chip), + pointer(availability), C_NULL, 0, C_NULL)) + + NVPW_CUDA_MetricsEvaluator_Initialize(params) + this = new(params[].pMetricsEvaluator, scratch, availability, chip) + end + finalizer(destroy, this) + return this + end +end +Base.unsafe_convert(::Type{Ptr{NVPW_MetricsEvaluator}}, me::CUDAMetricsEvaluator) = me.handle + + +function destroy(me::MetricsEvaluator) + GC.@preserve me begin + params = Ref(NVPW_MetricsEvaluator_Destroy_Params( + NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me) + )) + NVPW_MetricsEvaluator_Destroy(params) + end + return nothing +end + +struct MetricsIterator + me::MetricsEvaluator + type::NVPW_MetricType + names::Ptr{Cchar} + indices::Ptr{Csize_t} + numMetrics::Csize_t + + function MetricsIterator(me, type) + GC.@preserve me begin + params = Ref(NVPW_MetricsEvaluator_GetMetricNames_Params( + NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), type, C_NULL, C_NULL, 0)) + NVPW_MetricsEvaluator_GetMetricNames(params) + + names = Ptr{Cchar}(params[].pMetricNames) + indices = params[].pMetricNameBeginIndices + + return new(me, type, names, indices, params[].numMetrics) + end + end +end + +Base.length(metrics::MetricsIterator) = metrics.numMetrics +Base.eltype(::MetricsIterator) = String + +function Base.iterate(metrics::MetricsIterator, state=1) + if state <= metrics.numMetrics + name = unsafe_string(metrics.names + unsafe_load(metrics.indices, state)) + return (name, state+1) + else + return nothing + end +end + +function list_metrics(me::MetricsEvaluator) + for i in 0:(NVPW_METRIC_TYPE__COUNT-1) + type = NVPW_MetricType(i) + + for metric in MetricsIterator(me, type) + @show metric + end + end +end + +function submetrics(me::MetricsEvaluator, type) + GC.@preserve me begin + params = Ref(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params( + NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), type, C_NULL, 0)) + NVPW_MetricsEvaluator_GetSupportedSubmetrics(params) + unsafe_wrap(Array, params[].pSupportedSubmetrics, params[].numSupportedSubmetrics) + end +end + +# TODO rollup to string +# TODO submetric to string + +# function submetric(m) +# if m == NVPW_SUBMETRIC_PEAK_SUSTAINED +# return ".peak_sustained" +# elseif + +struct Metric + me::MetricsEvaluator + type::NVPW_MetricType + index::Csize_t + + function Metric(me::MetricsEvaluator, name) + GC.@preserve me name begin + params = Ref(NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params( + NVPW_MetricsEvaluator_GetMetricTypeAndIndex_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), pointer(name), 0, 0)) + NVPW_MetricsEvaluator_GetMetricTypeAndIndex(params) + return new(me, NVPW_MetricType(params[].metricType), params[].metricIndex) + end + end +end + +struct HWUnit + me::MetricsEvaluator + hwUnit::UInt32 +end + +function Base.string(u::HWUnit) + GC.@preserve u begin + params = Ref(NVPW_MetricsEvaluator_HwUnitToString_Params( + NVPW_MetricsEvaluator_HwUnitToString_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, u.me), u.hwUnit, + C_NULL)) + NVPW_MetricsEvaluator_HwUnitToString(params) + return unsafe_string(params[].pHwUnitName) + end +end + +function properties(m::Metric) + if m.type == NVPW_METRIC_TYPE_COUNTER + GC.@preserve m begin + params = Ref(NVPW_MetricsEvaluator_GetCounterProperties_Params( + NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, m.me), m.index, + C_NULL, 0)) + NVPW_MetricsEvaluator_GetCounterProperties(params) + description = unsafe_string(params[].pDescription) + hwUnit = params[].hwUnit + return (; description, unit=HWUnit(m.me, hwUnit)) + end + elseif m.type == NVPW_METRIC_TYPE_RATIO + GC.@preserve m begin + params = Ref(NVPW_MetricsEvaluator_GetRatioMetricProperties_Params( + NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, m.me), m.index, + C_NULL, 0)) + NVPW_MetricsEvaluator_GetRatioMetricProperties(params) + description = unsafe_string(params[].pDescription) + hwUnit = params[].hwUnit + return (; description, unit=HWUnit(m.me, hwUnit)) + end + else + error("Not implemented for $(m.type)") + end +end + +struct MetricEvalRequest + me::MetricsEvaluator + data::NVPW_MetricEvalRequest + + function MetricEvalRequest(me::MetricsEvaluator, name) + eval_request = Ref{NVPW_MetricEvalRequest}() + GC.@preserve me name eval_request begin + params = Ref(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params( + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), pointer(name), + Base.unsafe_convert(Ptr{NVPW_MetricEvalRequest}, eval_request), NVPW_MetricEvalRequest_STRUCT_SIZE)) + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(params) + return new(me, eval_request[]) + end + end +end