From 1c5ae57227e894175a82a16570994ebf444337f6 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Thu, 9 Nov 2017 15:43:14 +0000 Subject: [PATCH] added LWS/GWS per kernel printing during openCL profile --- CHANGES | 1 + module/program/module.py | 66 +++++++++++++------ script/ctuning.process.dvdt-prof/dvdt_prof.py | 6 +- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/CHANGES b/CHANGES index ff631c7..fc92d5e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,6 @@ * 2017.11.09 - added ondemand CPU/GPU freq setting after experiments to "calm" system to avoid overheating on embedded/IoT devices + - print LWS and GWS for OpenCL kernel profiles * 2017.11.08 - added script:ctuning.process.dvdt-prof to process raw OpenCL stats from dvdt prof diff --git a/module/program/module.py b/module/program/module.py index c59747f..2326d73 100644 --- a/module/program/module.py +++ b/module/program/module.py @@ -6235,45 +6235,71 @@ def benchmark(i): ck.out(' '+kernel+' : '+str(tmin)+' .. '+str(tmax)) # Check if sequence of OpenCL kernel time and rebuild sequence - kernels_min={} - kernels_max={} + kernels={} for k in flat: if k.startswith('##characteristics#run#execution_time_list_opencl') and k.endswith('#min'): - tmin=flat[k] - k1=k[:-3]+'max' - tmax=flat.get(k1,tmin) - j=k.find('#',49) if j>0: num=k[49:j] + if num not in kernels: + kernels[num]={'lws':{},'gws':{}} + j=k.find('#',49) if j>0: x=k[j+1:-4] - if num not in kernels_min: - kernels_min[num]={} - kernels_max[num]={} + v=flat[k] + + if x=='kernel_time': + kernels[num]['kernel_time_min']=v + + k1=k[:-3]+'max' + kernels[num]['kernel_time_max']=flat.get(k1,v) - kernels_min[num][x]=tmin - kernels_max[num][x]=tmax + elif x.startswith('lws@') or x.startswith('gws@'): + x1=x[4:] + kernels[num][x[:3]][x1]=v + else: + kernels[num][x]=v - if len(kernels_min)>0: + if len(kernels)>0: ck.out('') ck.out('* OpenCL aggregated kernel times in us. (min .. max):') ck.out('') - for q in sorted(kernels_min, key=lambda v: kernels_min[v]['sequence']): - qmin=kernels_min[q] - qmax=kernels_max[q] + for q in sorted(kernels, key=lambda v: kernels[v]['sequence']): + qq=kernels[q] + + kernel=qq['kernel_name'] + sec=qq['sequence'] + + tmin=qq['kernel_time_min']*1e-3 + tmax=qq['kernel_time_max']*1e-3 + + lws=qq['lws'] + gws=qq['gws'] - kernel=qmin['kernel_name'] - sec=qmin['sequence'] + xlws='' + for j in sorted(lws): + if xlws!='': xlws+=',' + xlws+=str(lws[j]) - tmin=qmin['kernel_time']*1e-3 - tmax=qmax['kernel_time']*1e-3 + xgws='' + for j in sorted(gws): + if xgws!='': xgws+=',' + xgws+=str(gws[j]) - ck.out(' '+str(sec)+') '+kernel+' : '+str(tmin)+' .. '+str(tmax)) + x='' + if xlws!='': x+='LWS='+xlws + if xgws!='': + if x!='': x+=' ' + x+='GWS='+xgws + if x!='': x=' ('+x+')' + + ck.out(' '+str(sec)+') '+kernel+' : '+str(tmin)+' .. '+str(tmax)+x) + + ck.out('') return r diff --git a/script/ctuning.process.dvdt-prof/dvdt_prof.py b/script/ctuning.process.dvdt-prof/dvdt_prof.py index c2e6593..7fe11a0 100644 --- a/script/ctuning.process.dvdt-prof/dvdt_prof.py +++ b/script/ctuning.process.dvdt-prof/dvdt_prof.py @@ -92,7 +92,11 @@ def process(i): kernel_name=nq['name'] kernel_time=nq['profiling']['end']-nq['profiling']['start'] - d['execution_time_list_opencl'].append({'kernel_name':kernel_name, 'kernel_time':kernel_time, 'sequence':seq}) + d['execution_time_list_opencl'].append({'kernel_name':kernel_name, + 'kernel_time':kernel_time, + 'sequence':seq, + 'lws':nq.get('lws',[]), + 'gws':nq.get('gws',[])}) if kernel_name not in d['execution_time_opencl_us']: d['execution_time_opencl_us'][kernel_name]=0.0